Browse Source

Merge pull request #162 from dora-rs/unify-nodes-and-operators

Redesign: Create a `dora-daemon` as a communication broker
tags/v0.2.0-candidate
Philipp Oppermann GitHub 2 years ago
parent
commit
37fc3729e3
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
100 changed files with 5966 additions and 2147 deletions
  1. +5
    -1
      .github/workflows/ci-python.yml
  2. +24
    -28
      .github/workflows/ci.yml
  3. +0
    -3
      .github/workflows/release.yml
  4. +242
    -129
      Cargo.lock
  5. +18
    -5
      Cargo.toml
  6. +5
    -1
      apis/c++/node/Cargo.toml
  7. +48
    -30
      apis/c++/node/src/lib.rs
  8. +24
    -15
      apis/c++/operator/src/lib.rs
  9. +5
    -3
      apis/c/node/Cargo.toml
  10. +15
    -4
      apis/c/node/node_api.h
  11. +96
    -47
      apis/c/node/src/lib.rs
  12. +3
    -3
      apis/c/operator/operator_api.h
  13. +20
    -5
      apis/c/operator/operator_types.h
  14. +48
    -24
      apis/python/node/src/lib.rs
  15. +5
    -6
      apis/rust/node/Cargo.toml
  16. +0
    -249
      apis/rust/node/src/communication.rs
  17. +470
    -0
      apis/rust/node/src/daemon/mod.rs
  18. +58
    -0
      apis/rust/node/src/daemon/tcp.rs
  19. +79
    -115
      apis/rust/node/src/lib.rs
  20. +9
    -9
      apis/rust/operator/macros/src/lib.rs
  21. +10
    -3
      apis/rust/operator/src/lib.rs
  22. +26
    -9
      apis/rust/operator/src/raw.rs
  23. +16
    -6
      apis/rust/operator/types/src/lib.rs
  24. +0
    -1
      binaries/cli/Cargo.toml
  25. +60
    -55
      binaries/cli/src/check.rs
  26. +21
    -20
      binaries/cli/src/main.rs
  27. +9
    -0
      binaries/cli/src/template/python/operator/operator-template.py
  28. +12
    -6
      binaries/cli/src/template/rust/node/main-template.rs
  29. +9
    -6
      binaries/cli/src/template/rust/operator/lib-template.rs
  30. +22
    -45
      binaries/cli/src/up.rs
  31. +3
    -3
      binaries/coordinator/Cargo.toml
  32. +107
    -65
      binaries/coordinator/src/control.rs
  33. +339
    -155
      binaries/coordinator/src/lib.rs
  34. +83
    -0
      binaries/coordinator/src/listener.rs
  35. +0
    -96
      binaries/coordinator/src/run/custom.rs
  36. +51
    -106
      binaries/coordinator/src/run/mod.rs
  37. +0
    -116
      binaries/coordinator/src/run/runtime.rs
  38. +23
    -0
      binaries/coordinator/src/tcp_utils.rs
  39. +27
    -0
      binaries/daemon/Cargo.toml
  40. +114
    -0
      binaries/daemon/src/coordinator.rs
  41. +974
    -0
      binaries/daemon/src/lib.rs
  42. +419
    -0
      binaries/daemon/src/listener/mod.rs
  43. +75
    -0
      binaries/daemon/src/listener/shmem.rs
  44. +85
    -0
      binaries/daemon/src/listener/tcp.rs
  45. +60
    -0
      binaries/daemon/src/main.rs
  46. +309
    -0
      binaries/daemon/src/shared_mem_handler.rs
  47. +163
    -0
      binaries/daemon/src/spawn.rs
  48. +23
    -0
      binaries/daemon/src/tcp_utils.rs
  49. +4
    -12
      binaries/runtime/Cargo.toml
  50. +214
    -113
      binaries/runtime/src/lib.rs
  51. +127
    -0
      binaries/runtime/src/operator/channel.rs
  52. +77
    -37
      binaries/runtime/src/operator/mod.rs
  53. +64
    -85
      binaries/runtime/src/operator/python.rs
  54. +113
    -94
      binaries/runtime/src/operator/shared_lib.rs
  55. +19
    -0
      examples/benchmark/dataflow.yml
  56. +6
    -3
      examples/benchmark/node/Cargo.toml
  57. +72
    -0
      examples/benchmark/node/src/main.rs
  58. +43
    -0
      examples/benchmark/run.rs
  59. +4
    -3
      examples/benchmark/sink/Cargo.toml
  60. +98
    -0
      examples/benchmark/sink/src/main.rs
  61. +4
    -2
      examples/c++-dataflow/dataflow.yml
  62. +46
    -32
      examples/c++-dataflow/node-c-api/main.cc
  63. +24
    -11
      examples/c++-dataflow/node-rust-api/main.cc
  64. +39
    -28
      examples/c++-dataflow/operator-c-api/operator.cc
  65. +38
    -24
      examples/c++-dataflow/run.rs
  66. +3
    -3
      examples/c-dataflow/dataflow.yml
  67. +28
    -13
      examples/c-dataflow/node.c
  68. +42
    -31
      examples/c-dataflow/operator.c
  69. +21
    -7
      examples/c-dataflow/run.rs
  70. +30
    -16
      examples/c-dataflow/sink.c
  71. +0
    -26
      examples/iceoryx/dataflow.yml
  72. +0
    -34
      examples/iceoryx/node/src/main.rs
  73. +0
    -13
      examples/iceoryx/operator/Cargo.toml
  74. +0
    -47
      examples/iceoryx/operator/src/lib.rs
  75. +0
    -33
      examples/iceoryx/run.rs
  76. +0
    -28
      examples/iceoryx/sink/src/main.rs
  77. +4
    -4
      examples/python-dataflow/dataflow.yml
  78. +4
    -4
      examples/python-dataflow/dataflow_without_webcam.yml
  79. +10
    -5
      examples/python-dataflow/no_webcam.py
  80. +27
    -30
      examples/python-dataflow/object_detection.py
  81. +26
    -5
      examples/python-dataflow/plot.py
  82. +12
    -1
      examples/python-dataflow/run.rs
  83. +3
    -1
      examples/python-dataflow/run.sh
  84. +12
    -4
      examples/python-dataflow/webcam.py
  85. +1
    -0
      examples/python-operator-dataflow/.gitignore
  86. +33
    -0
      examples/python-operator-dataflow/README.md
  87. +27
    -0
      examples/python-operator-dataflow/dataflow.yml
  88. +27
    -0
      examples/python-operator-dataflow/dataflow_without_webcam.yml
  89. +31
    -0
      examples/python-operator-dataflow/no_webcam.py
  90. +51
    -0
      examples/python-operator-dataflow/object_detection.py
  91. +104
    -0
      examples/python-operator-dataflow/plot.py
  92. +45
    -0
      examples/python-operator-dataflow/requirements.txt
  93. +46
    -0
      examples/python-operator-dataflow/run.rs
  94. +15
    -0
      examples/python-operator-dataflow/run.sh
  95. +82
    -0
      examples/python-operator-dataflow/utils.py
  96. +31
    -0
      examples/python-operator-dataflow/webcam.py
  97. +16
    -6
      examples/rust-dataflow-url/run.rs
  98. +3
    -1
      examples/rust-dataflow/dataflow.yml
  99. +1
    -1
      examples/rust-dataflow/node/Cargo.toml
  100. +25
    -21
      examples/rust-dataflow/node/src/main.rs

+ 5
- 1
.github/workflows/ci-python.yml View File

@@ -7,6 +7,7 @@ on:
- apis/python/**
- binaries/runtime/**
pull_request:
workflow_dispatch:

jobs:
examples:
@@ -20,7 +21,7 @@ jobs:
sudo apt-get install -y libacl1-dev
- uses: actions/setup-python@v2
with:
python-version: 3.8
python-version: "3.10"

- uses: r7kamura/rust-problem-matchers@v1.1.0
- run: cargo --version --verbose
@@ -28,3 +29,6 @@ jobs:

- name: "Python Dataflow example"
run: cargo run --example python-dataflow

- name: "Python Operator Dataflow example"
run: cargo run --example python-operator-dataflow

+ 24
- 28
.github/workflows/ci.yml View File

@@ -60,19 +60,17 @@ jobs:
- name: "Rust Dataflow example"
timeout-minutes: 30
run: cargo run --example rust-dataflow

- name: "Benchmark example"
timeout-minutes: 30
run: cargo run --example benchmark --release

- name: "C Dataflow example"
timeout-minutes: 15
run: cargo run --example c-dataflow
- name: "C++ Dataflow example"
timeout-minutes: 15
run: cargo run --example cxx-dataflow
- name: "Start RouDi (iceoryx)"
if: runner.os != 'Windows'
run: find target -type f -wholename "*/iceoryx-install/bin/iox-roudi" -exec {} \; &
- name: "Rust iceoryx example"
if: runner.os != 'Windows'
timeout-minutes: 30
run: cargo run --example iceoryx

CLI:
name: "CLI Test"
@@ -90,36 +88,31 @@ jobs:
export DEBIAN_FRONTEND=noninteractive
sudo apt-get install -y libacl1-dev

- uses: r7kamura/rust-problem-matchers@v1.1.0
- run: cargo --version --verbose
- uses: Swatinem/rust-cache@v2

- name: "Build cli and binaries"
timeout-minutes: 30
run: |
cargo install --path binaries/coordinator
cargo install --path binaries/daemon
cargo install --path binaries/runtime
cargo install --path binaries/cli

- name: "Start dora-coordinator"
run: |
dora-coordinator &

- name: "Test dora `list"
run: dora-cli list

- name: "Test new command"
run: |
dora-cli new test_project

- name: "Test start and stop command"
- name: "Test CLI"
timeout-minutes: 30
run: |
cd test_project
cargo build --all
UUID=$(dora-cli start dataflow.yml)
sleep 10
dora-cli stop $UUID
cd ..

- name: "Test dora `destroy"
run: dora-cli destroy
dora-cli up
dora-cli list
dora-cli new test_project
cd test_project
cargo build --all --config "patch.'https://github.com/dora-rs/dora.git'.dora-node-api.path=\"../apis/rust/node\"" --config "patch.'https://github.com/dora-rs/dora.git'.dora-operator-api.path=\"../apis/rust/operator\""
UUID=$(dora-cli start dataflow.yml)
sleep 10
dora-cli stop $UUID
cd ..
dora-cli destroy

examples-remote:
name: "Examples (Remote)"
@@ -138,6 +131,7 @@ jobs:
- uses: Swatinem/rust-cache@v2

- name: "Remote Rust Dataflow example"
if: false # skip this example for now until we uploaded new test nodes
timeout-minutes: 30
run: cargo run --example rust-dataflow-url

@@ -159,8 +153,10 @@ jobs:
run: cargo clippy --all
- name: "Clippy (tracing feature)"
run: cargo clippy --all --features tracing
if: false # only the dora-runtime has this feature, but it is currently commented out
- name: "Clippy (metrics feature)"
run: cargo clippy --all --features metrics
if: false # only the dora-runtime has this feature, but it is currently commented out

rustfmt:
name: "Formatting"


+ 0
- 3
.github/workflows/release.yml View File

@@ -41,9 +41,6 @@ jobs:
cp target/release/dora-runtime archive
cp target/release/dora-coordinator archive
cp target/release/dora-cli archive/dora
mkdir archive/iceoryx
find target -type f -wholename "*/iceoryx-install/bin/iox-roudi" -exec cp {} archive/iceoryx \;
find target -type f -wholename "*/iceoryx-install/share/doc/iceoryx_posh/LICENSE" -exec cp {} archive/iceoryx \;
cd archive
zip -r ../archive.zip .
cd ..


+ 242
- 129
Cargo.lock View File

@@ -219,9 +219,9 @@ checksum = "30696a84d817107fc028e049980e09d5e140e8da8f1caeb17e8e950658a3cea9"

[[package]]
name = "async-trait"
version = "0.1.53"
version = "0.1.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600"
checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2"
dependencies = [
"proc-macro2",
"quote",
@@ -272,6 +272,29 @@ version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6b4d9b1225d28d360ec6a231d65af1fd99a2a095154c8040689617290569c5c"

[[package]]
name = "benchmark-example-node"
version = "0.1.3"
dependencies = [
"dora-node-api",
"eyre",
"futures",
"rand",
"tokio",
"tracing",
"tracing-subscriber",
]

[[package]]
name = "benchmark-example-sink"
version = "0.1.3"
dependencies = [
"dora-node-api",
"eyre",
"tracing",
"tracing-subscriber",
]

[[package]]
name = "bincode"
version = "1.3.3"
@@ -287,6 +310,18 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"

[[package]]
name = "bitvec"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
dependencies = [
"funty",
"radium",
"tap",
"wyz",
]

[[package]]
name = "block-buffer"
version = "0.7.3"
@@ -745,6 +780,16 @@ dependencies = [
"syn",
]

[[package]]
name = "ctrlc"
version = "3.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbcf33c2a618cbe41ee43ae6e9f2e48368cd9f9db2896f10167d8d762679f639"
dependencies = [
"nix 0.26.2",
"windows-sys 0.45.0",
]

[[package]]
name = "cty"
version = "0.2.2"
@@ -753,9 +798,9 @@ checksum = "b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35"

[[package]]
name = "cxx"
version = "1.0.73"
version = "1.0.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "873c2e83af70859af2aaecd1f5d862f3790b747b1f4f50fb45a931d000ac0422"
checksum = "5add3fc1717409d029b20c5b6903fc0c0b02fa6741d820054f4a2efa5e5816fd"
dependencies = [
"cc",
"cxxbridge-flags",
@@ -780,15 +825,15 @@ dependencies = [

[[package]]
name = "cxxbridge-flags"
version = "1.0.73"
version = "1.0.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f46b787c15af80277db5c88c6ac6c502ae545e622f010e06f95e540d34931acf"
checksum = "69a3e162fde4e594ed2b07d0f83c6c67b745e7f28ce58c6df5e6b6bef99dfb59"

[[package]]
name = "cxxbridge-macro"
version = "1.0.73"
version = "1.0.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ba3f3a7efa46626878fb5d324fabca4d19d2956b6ae97ce43044ef4515f5abc"
checksum = "3e7e2adeb6a0d4a282e581096b06e1791532b7d576dcde5ccd9382acf55db8e6"
dependencies = [
"proc-macro2",
"quote",
@@ -887,7 +932,6 @@ dependencies = [
"serde",
"serde_json",
"serde_yaml 0.9.11",
"sysinfo 0.26.6",
"tempfile",
"termcolor",
"uuid 1.2.1",
@@ -901,13 +945,13 @@ dependencies = [
"bincode",
"clap 3.2.20",
"communication-layer-request-reply",
"ctrlc",
"dora-core",
"dora-download",
"dora-message",
"dora-node-api",
"eyre",
"futures",
"futures-concurrency 5.0.1",
"futures-concurrency",
"rand",
"serde",
"serde_json",
@@ -928,16 +972,43 @@ dependencies = [
name = "dora-core"
version = "0.1.3"
dependencies = [
"dora-message",
"eyre",
"once_cell",
"serde",
"serde-with-expand-env",
"serde_yaml 0.9.11",
"tracing",
"uuid 1.2.1",
"which",
"zenoh-config",
]

[[package]]
name = "dora-daemon"
version = "0.1.0"
dependencies = [
"async-trait",
"bincode",
"clap 3.2.20",
"ctrlc",
"dora-core",
"dora-download",
"eyre",
"flume",
"futures",
"futures-concurrency",
"serde",
"serde_json",
"serde_yaml 0.8.23",
"shared-memory-server",
"tokio",
"tokio-stream",
"tracing",
"tracing-subscriber",
"uuid 1.2.1",
]

[[package]]
name = "dora-download"
version = "0.1.3"
@@ -953,10 +1024,15 @@ dependencies = [
name = "dora-examples"
version = "0.0.0"
dependencies = [
"dora-coordinator",
"dora-core",
"dora-daemon",
"dunce",
"eyre",
"serde_yaml 0.8.23",
"tokio",
"tracing",
"tracing-subscriber",
"uuid 1.2.1",
]

[[package]]
@@ -965,6 +1041,7 @@ version = "0.1.3"
dependencies = [
"capnp",
"capnpc",
"serde",
"uhlc 0.5.1",
]

@@ -983,15 +1060,16 @@ dependencies = [
name = "dora-node-api"
version = "0.1.3"
dependencies = [
"bincode",
"capnp",
"communication-layer-pub-sub",
"dora-core",
"dora-message",
"eyre",
"flume",
"once_cell",
"serde",
"serde_json",
"serde_yaml 0.8.23",
"shared-memory-server",
"thiserror",
"tokio",
"tracing",
@@ -1093,20 +1171,17 @@ dependencies = [
name = "dora-runtime"
version = "0.1.3"
dependencies = [
"clap 3.2.20",
"dora-core",
"dora-download",
"dora-message",
"dora-metrics",
"dora-node-api",
"dora-operator-api-python",
"dora-operator-api-types",
"dora-tracing",
"eyre",
"fern",
"flume",
"futures",
"futures-concurrency 2.0.3",
"futures-concurrency",
"libloading",
"opentelemetry",
"opentelemetry-system-metrics",
@@ -1116,8 +1191,6 @@ dependencies = [
"tokio-stream",
"tracing",
"tracing-subscriber",
"zenoh",
"zenoh-config",
]

[[package]]
@@ -1200,15 +1273,6 @@ dependencies = [
"instant",
]

[[package]]
name = "fern"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bdd7b0849075e79ee9a1836df22c717d1eba30451796fdc631b04565dd11e2a"
dependencies = [
"log",
]

[[package]]
name = "fixedbitset"
version = "0.4.1"
@@ -1269,6 +1333,12 @@ dependencies = [
"percent-encoding",
]

[[package]]
name = "funty"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"

[[package]]
name = "futures"
version = "0.3.25"
@@ -1296,21 +1366,11 @@ dependencies = [

[[package]]
name = "futures-concurrency"
version = "2.0.3"
version = "7.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48e98b7b5aedee7c34a5cfb1ee1681af8faf46e2f30c0b8af5ea08eba517d61c"
dependencies = [
"async-trait",
"futures-core",
"pin-project",
]

[[package]]
name = "futures-concurrency"
version = "5.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "407ed2aa475d777e35fb167144b63babd0377b2f9a528ae3ec4bec94f1ce1f1a"
checksum = "e06f199437c8a435c12ad153c5a1f4e131871cf6f6025585bb15e8cbb414f0dc"
dependencies = [
"bitvec",
"futures-core",
"pin-project",
]
@@ -1649,32 +1709,6 @@ dependencies = [
"tokio-native-tls",
]

[[package]]
name = "iceoryx-example-node"
version = "0.1.3"
dependencies = [
"dora-node-api",
"eyre",
"rand",
]

[[package]]
name = "iceoryx-example-operator"
version = "0.1.3"
dependencies = [
"dora-operator-api",
]

[[package]]
name = "iceoryx-example-sink"
version = "0.1.3"
dependencies = [
"dora-node-api",
"eyre",
"futures",
"tokio",
]

[[package]]
name = "iceoryx-rs"
version = "0.1.0"
@@ -1879,9 +1913,9 @@ dependencies = [

[[package]]
name = "libc"
version = "0.2.121"
version = "0.2.139"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"

[[package]]
name = "libloading"
@@ -2189,6 +2223,18 @@ dependencies = [
"memoffset",
]

[[package]]
name = "nix"
version = "0.26.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"
dependencies = [
"bitflags",
"cfg-if",
"libc",
"static_assertions",
]

[[package]]
name = "ntapi"
version = "0.3.7"
@@ -2199,11 +2245,12 @@ dependencies = [
]

[[package]]
name = "ntapi"
version = "0.4.0"
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc51db7b362b205941f71232e56c625156eb9a929f8cf74a428fd5bc094a4afc"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]

@@ -2420,7 +2467,7 @@ checksum = "a848fb2d43cc8e5adabdedc6b37a88b45653d3a23b000a3d047e6953d5af42ea"
dependencies = [
"indexmap",
"opentelemetry",
"sysinfo 0.24.5",
"sysinfo",
]

[[package]]
@@ -2447,6 +2494,12 @@ version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"

[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"

[[package]]
name = "parking"
version = "2.0.0"
@@ -2761,9 +2814,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"

[[package]]
name = "proc-macro2"
version = "1.0.43"
version = "1.0.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2"
dependencies = [
"unicode-ident",
]
@@ -2943,6 +2996,12 @@ dependencies = [
"proc-macro2",
]

[[package]]
name = "radium"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"

[[package]]
name = "rand"
version = "0.8.5"
@@ -2982,6 +3041,18 @@ dependencies = [
"cty",
]

[[package]]
name = "raw_sync"
version = "0.1.5"
source = "git+https://github.com/cameronelliott/raw_sync-rs.git#b1d6e16381b498fe618e5c1a1d1f2b1d2e6ef019"
dependencies = [
"cfg-if",
"libc",
"nix 0.23.1",
"rand",
"winapi",
]

[[package]]
name = "rayon"
version = "1.5.1"
@@ -3360,9 +3431,9 @@ checksum = "d65bd28f48be7196d222d95b9243287f48d27aca604e08497513019ff0502cc4"

[[package]]
name = "serde"
version = "1.0.144"
version = "1.0.152"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860"
checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
dependencies = [
"serde_derive",
]
@@ -3379,9 +3450,9 @@ dependencies = [

[[package]]
name = "serde_derive"
version = "1.0.144"
version = "1.0.152"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94ed3a816fb1d101812f83e789f888322c34e291f894f19590dc310963e87a00"
checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e"
dependencies = [
"proc-macro2",
"quote",
@@ -3390,9 +3461,9 @@ dependencies = [

[[package]]
name = "serde_json"
version = "1.0.86"
version = "1.0.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41feea4228a6f1cd09ec7a3593a682276702cd67b5273544757dae23c096f074"
checksum = "020ff22c755c2ed3f8cf162dbb41a7268d934702f3ed3631656ea597e08fc3db"
dependencies = [
"itoa",
"ryu",
@@ -3469,6 +3540,18 @@ dependencies = [
"lazy_static",
]

[[package]]
name = "shared-memory-server"
version = "0.1.3"
dependencies = [
"bincode",
"eyre",
"raw_sync",
"serde",
"shared_memory",
"tracing",
]

[[package]]
name = "shared_memory"
version = "0.12.0"
@@ -3529,9 +3612,9 @@ checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5"

[[package]]
name = "smallvec"
version = "1.8.0"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"

[[package]]
name = "socket2"
@@ -3567,6 +3650,12 @@ dependencies = [
"der",
]

[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"

[[package]]
name = "stop-token"
version = "0.7.0"
@@ -3599,9 +3688,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"

[[package]]
name = "syn"
version = "1.0.99"
version = "1.0.107"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5"
dependencies = [
"proc-macro2",
"quote",
@@ -3629,26 +3718,17 @@ dependencies = [
"cfg-if",
"core-foundation-sys",
"libc",
"ntapi 0.3.7",
"ntapi",
"once_cell",
"rayon",
"winapi",
]

[[package]]
name = "sysinfo"
version = "0.26.6"
name = "tap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6d0dedf2e65d25b365c588382be9dc3a3ee4b0ed792366cf722d174c359d948"
dependencies = [
"cfg-if",
"core-foundation-sys",
"libc",
"ntapi 0.4.0",
"once_cell",
"rayon",
"winapi",
]
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"

[[package]]
name = "target-lexicon"
@@ -3823,9 +3903,9 @@ dependencies = [

[[package]]
name = "tokio-stream"
version = "0.1.9"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df54d54117d6fdc4e4fea40fe1e4e566b3505700e148a6827e59b34b0d2600d9"
checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce"
dependencies = [
"futures-core",
"pin-project-lite",
@@ -3937,9 +4017,9 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"

[[package]]
name = "tracing"
version = "0.1.36"
version = "0.1.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fce9567bd60a67d08a16488756721ba392f24f29006402881e43b19aac64307"
checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
dependencies = [
"cfg-if",
"log",
@@ -3950,9 +4030,9 @@ dependencies = [

[[package]]
name = "tracing-attributes"
version = "0.1.22"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11c75893af559bc8e10716548bdef5cb2b983f8e637db9d0e15126b61b484ee2"
checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a"
dependencies = [
"proc-macro2",
"quote",
@@ -3961,9 +4041,9 @@ dependencies = [

[[package]]
name = "tracing-core"
version = "0.1.29"
version = "0.1.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aeea4303076558a00714b823f9ad67d58a3bbda1df83d8827d21193156e22f7"
checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a"
dependencies = [
"once_cell",
"valuable",
@@ -3992,11 +4072,11 @@ dependencies = [

[[package]]
name = "tracing-subscriber"
version = "0.3.15"
version = "0.3.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60db860322da191b40952ad9affe65ea23e7dd6a5c442c2c42865810c6ab8e6b"
checksum = "a6176eae26dd70d0c919749377897b54a9276bd7061339665dd68777926b5a70"
dependencies = [
"ansi_term",
"nu-ansi-term",
"sharded-slab",
"smallvec",
"thread_local",
@@ -4485,19 +4565,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc 0.42.0",
"windows_i686_gnu 0.42.0",
"windows_i686_msvc 0.42.0",
"windows_x86_64_gnu 0.42.0",
"windows_aarch64_msvc 0.42.1",
"windows_i686_gnu 0.42.1",
"windows_i686_msvc 0.42.1",
"windows_x86_64_gnu 0.42.1",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc 0.42.1",
]

[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets",
]

[[package]]
name = "windows-targets"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc 0.42.1",
"windows_i686_gnu 0.42.1",
"windows_i686_msvc 0.42.1",
"windows_x86_64_gnu 0.42.1",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc 0.42.0",
"windows_x86_64_msvc 0.42.1",
]

[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.0"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e"
checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608"

[[package]]
name = "windows_aarch64_msvc"
@@ -4513,9 +4617,9 @@ checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"

[[package]]
name = "windows_aarch64_msvc"
version = "0.42.0"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4"
checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7"

[[package]]
name = "windows_i686_gnu"
@@ -4531,9 +4635,9 @@ checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"

[[package]]
name = "windows_i686_gnu"
version = "0.42.0"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7"
checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640"

[[package]]
name = "windows_i686_msvc"
@@ -4549,9 +4653,9 @@ checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"

[[package]]
name = "windows_i686_msvc"
version = "0.42.0"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246"
checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605"

[[package]]
name = "windows_x86_64_gnu"
@@ -4567,15 +4671,15 @@ checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"

[[package]]
name = "windows_x86_64_gnu"
version = "0.42.0"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed"
checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45"

[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.0"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028"
checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463"

[[package]]
name = "windows_x86_64_msvc"
@@ -4591,9 +4695,9 @@ checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"

[[package]]
name = "windows_x86_64_msvc"
version = "0.42.0"
version = "0.42.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5"
checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd"

[[package]]
name = "winreg"
@@ -4624,6 +4728,15 @@ dependencies = [
"syn",
]

[[package]]
name = "wyz"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
dependencies = [
"tap",
]

[[package]]
name = "yaml-rust"
version = "0.4.5"


+ 18
- 5
Cargo.toml View File

@@ -7,12 +7,16 @@ members = [
"apis/rust/*",
"apis/rust/operator/macros",
"apis/rust/operator/types",
"binaries/*",
"binaries/cli",
"binaries/coordinator",
"binaries/daemon",
"binaries/runtime",
"examples/rust-dataflow/*",
"examples/iceoryx/*",
"examples/benchmark/*",
"libraries/communication-layer/*",
"libraries/core",
"libraries/message",
"libraries/shared-memory-server",
"libraries/extensions/download",
"libraries/extensions/telemetry/*",
"libraries/extensions/zenoh-logger",
@@ -36,8 +40,13 @@ license = "Apache-2.0"
[dev-dependencies]
eyre = "0.6.8"
tokio = "1.24.2"
dora-coordinator = { path = "binaries/coordinator" }
dora-daemon = { path = "binaries/daemon" }
dora-core = { path = "libraries/core" }
dunce = "1.0.2"
serde_yaml = "0.8.23"
uuid = { version = "1.2.1", features = ["v4", "serde"] }
tracing = "0.1.36"
tracing-subscriber = "0.3.15"

[[example]]
name = "c-dataflow"
@@ -60,5 +69,9 @@ name = "python-dataflow"
path = "examples/python-dataflow/run.rs"

[[example]]
name = "iceoryx"
path = "examples/iceoryx/run.rs"
name = "python-operator-dataflow"
path = "examples/python-operator-dataflow/run.rs"

[[example]]
name = "benchmark"
path = "examples/benchmark/run.rs"

+ 5
- 1
apis/c++/node/Cargo.toml View File

@@ -8,9 +8,13 @@ edition = "2021"
[lib]
crate-type = ["staticlib"]

[features]
default = ["tracing-subscriber"]
tracing-subscriber = ["dora-node-api/tracing-subscriber"]

[dependencies]
cxx = "1.0.73"
dora-node-api = { workspace = true, features = ["zenoh"] }
dora-node-api = { workspace = true }
eyre = "0.6.8"

[build-dependencies]


+ 48
- 30
apis/c++/node/src/lib.rs View File

@@ -1,14 +1,24 @@
use dora_node_api::{self, Input, Receiver};
use dora_node_api::{self, Event, EventStream};
use eyre::bail;

#[cxx::bridge]
#[allow(clippy::needless_lifetimes)]
mod ffi {
struct DoraNode {
inputs: Box<Inputs>,
events: Box<Events>,
send_output: Box<OutputSender>,
}

pub enum DoraEventType {
Stop,
Input,
InputClosed,
Error,
Unknown,
AllInputsClosed,
}

struct DoraInput {
end_of_input: bool,
id: String,
data: Vec<u8>,
}
@@ -18,12 +28,15 @@ mod ffi {
}

extern "Rust" {
type Inputs;
type Events;
type OutputSender;
type DoraEvent<'a>;

fn init_dora_node() -> Result<DoraNode>;
fn free_dora_node(node: DoraNode);
fn next_input(inputs: &mut Box<Inputs>) -> DoraInput;

fn next_event(inputs: &mut Box<Events>) -> Box<DoraEvent<'_>>;
fn event_type(event: &Box<DoraEvent>) -> DoraEventType;
fn event_as_input(event: Box<DoraEvent>) -> Result<DoraInput>;
fn send_output(
output_sender: &mut Box<OutputSender>,
id: String,
@@ -33,48 +46,53 @@ mod ffi {
}

fn init_dora_node() -> eyre::Result<ffi::DoraNode> {
let mut node = dora_node_api::DoraNode::init_from_env()?;
let input_stream = node.inputs()?;
let inputs = Inputs(input_stream);
let (node, events) = dora_node_api::DoraNode::init_from_env()?;
let events = Events(events);
let send_output = OutputSender(node);

Ok(ffi::DoraNode {
inputs: Box::new(inputs),
events: Box::new(events),
send_output: Box::new(send_output),
})
}

fn free_dora_node(node: ffi::DoraNode) {
let _ = node;
pub struct Events(EventStream);

fn next_event(events: &mut Box<Events>) -> Box<DoraEvent> {
Box::new(DoraEvent(events.0.recv()))
}

pub struct Inputs(Receiver<Input>);

fn next_input(inputs: &mut Box<Inputs>) -> ffi::DoraInput {
match inputs.0.recv() {
Ok(input) => {
let id = input.id.clone().into();
let data = input.data();
ffi::DoraInput {
end_of_input: false,
id,
data: data.into_owned(),
}
}
Err(_) => ffi::DoraInput {
end_of_input: true,
id: String::new(),
data: Vec::new(),
pub struct DoraEvent<'a>(Option<Event<'a>>);

fn event_type(event: &DoraEvent) -> ffi::DoraEventType {
match &event.0 {
Some(event) => match event {
Event::Stop => ffi::DoraEventType::Stop,
Event::Input { .. } => ffi::DoraEventType::Input,
Event::InputClosed { .. } => ffi::DoraEventType::InputClosed,
Event::Error(_) => ffi::DoraEventType::Error,
_ => ffi::DoraEventType::Unknown,
},
None => ffi::DoraEventType::AllInputsClosed,
}
}

fn event_as_input(event: Box<DoraEvent>) -> eyre::Result<ffi::DoraInput> {
let Some(Event::Input { id, metadata: _, data }) = event.0 else {
bail!("not an input event");
};
Ok(ffi::DoraInput {
id: id.into(),
data: data.map(|d| d.to_owned()).unwrap_or_default(),
})
}

pub struct OutputSender(dora_node_api::DoraNode);

fn send_output(sender: &mut Box<OutputSender>, id: String, data: &[u8]) -> ffi::DoraResult {
let result = sender
.0
.send_output(&id.into(), Default::default(), data.len(), |out| {
.send_output(id.into(), Default::default(), data.len(), |out| {
out.copy_from_slice(data)
});
let error = match result {


+ 24
- 15
apis/c++/operator/src/lib.rs View File

@@ -1,7 +1,9 @@
#![cfg(not(test))]
#![warn(unsafe_op_in_unsafe_fn)]

use dora_operator_api::{self, register_operator, DoraOperator, DoraOutputSender, DoraStatus};
use dora_operator_api::{
self, register_operator, DoraOperator, DoraOutputSender, DoraStatus, Event,
};
use ffi::DoraSendOutputResult;

#[cxx::bridge]
@@ -64,23 +66,30 @@ impl Default for OperatorWrapper {
}

impl DoraOperator for OperatorWrapper {
fn on_input(
fn on_event(
&mut self,
id: &str,
data: &[u8],
event: &Event,
output_sender: &mut DoraOutputSender,
) -> Result<DoraStatus, std::string::String> {
let operator = self.operator.as_mut().unwrap();
let mut output_sender = OutputSender(output_sender);

let result = ffi::on_input(operator, id, data, &mut output_sender);
if result.error.is_empty() {
Ok(match result.stop {
false => DoraStatus::Continue,
true => DoraStatus::Stop,
})
} else {
Err(result.error)
match event {
Event::Input { id, data } => {
let operator = self.operator.as_mut().unwrap();
let mut output_sender = OutputSender(output_sender);

let result = ffi::on_input(operator, id, data, &mut output_sender);
if result.error.is_empty() {
Ok(match result.stop {
false => DoraStatus::Continue,
true => DoraStatus::Stop,
})
} else {
Err(result.error)
}
}
_ => {
// ignore other events for now
Ok(DoraStatus::Continue)
}
}
}
}

+ 5
- 3
apis/c/node/Cargo.toml View File

@@ -9,12 +9,14 @@ license = "Apache-2.0"
[lib]
crate-type = ["staticlib"]

[features]
default = ["tracing-subscriber"]
tracing-subscriber = ["dora-node-api/tracing-subscriber"]

[dependencies]
eyre = "0.6.8"
flume = "0.10.14"
tracing = "0.1.33"

[dependencies.dora-node-api]
default-features = false
features = ["zenoh"]
path = "../../rust/node"
workspace = true

+ 15
- 4
apis/c/node/node_api.h View File

@@ -3,9 +3,20 @@
void *init_dora_context_from_env();
void free_dora_context(void *dora_context);

void *dora_next_input(void *dora_context);
void read_dora_input_id(void *dora_input, char **out_ptr, size_t *out_len);
void read_dora_input_data(void *dora_input, char **out_ptr, size_t *out_len);
void free_dora_input(void *dora_input);
void *dora_next_event(void *dora_context);
void free_dora_event(void *dora_event);

enum DoraEventType
{
DoraEventType_Stop,
DoraEventType_Input,
DoraEventType_InputClosed,
DoraEventType_Error,
DoraEventType_Unknown,
};
enum DoraEventType read_dora_event_type(void *dora_event);

void read_dora_input_id(void *dora_event, char **out_ptr, size_t *out_len);
void read_dora_input_data(void *dora_event, char **out_ptr, size_t *out_len);

int dora_send_output(void *dora_context, char *id_ptr, size_t id_len, char *data_ptr, size_t data_len);

+ 96
- 47
apis/c/node/src/lib.rs View File

@@ -1,12 +1,12 @@
#![deny(unsafe_op_in_unsafe_fn)]

use dora_node_api::{DoraNode, Input};
use dora_node_api::{DoraNode, Event, EventStream};
use eyre::Context;
use std::{ffi::c_void, ptr, slice};

struct DoraContext {
node: &'static mut DoraNode,
inputs: flume::Receiver<Input>,
events: EventStream,
}

/// Initializes a dora context from the environment variables that were set by
@@ -21,10 +21,9 @@ struct DoraContext {
#[no_mangle]
pub extern "C" fn init_dora_context_from_env() -> *mut c_void {
let context = || {
let node = DoraNode::init_from_env()?;
let (node, events) = DoraNode::init_from_env()?;
let node = Box::leak(Box::new(node));
let inputs = node.inputs()?;
Result::<_, eyre::Report>::Ok(DoraContext { node, inputs })
Result::<_, eyre::Report>::Ok(DoraContext { node, events })
};
let context = match context().context("failed to initialize node") {
Ok(n) => n,
@@ -54,15 +53,15 @@ pub unsafe extern "C" fn free_dora_context(context: *mut c_void) {
let _ = unsafe { Box::from_raw(node as *const DoraNode as *mut DoraNode) };
}

/// Waits for the next incoming input for the node.
/// Waits for the next incoming event for the node.
///
/// Returns a pointer to the input on success. This pointer must not be used
/// directly. Instead, use the `read_dora_input_*` functions to read out the
/// ID and data of the input. When the input is not needed anymore, use
/// [`free_dora_input`] to free it again.
/// Returns a pointer to the event on success. This pointer must not be used
/// directly. Instead, use the `read_dora_event_*` functions to read out the
/// type and payload of the event. When the event is not needed anymore, use
/// [`free_dora_event`] to free it again.
///
/// Returns a null pointer when all input streams were closed. This means that
/// no more input will be available. Nodes typically react by stopping.
/// Returns a null pointer when all event streams were closed. This means that
/// no more event will be available. Nodes typically react by stopping.
///
/// ## Safety
///
@@ -70,83 +69,133 @@ pub unsafe extern "C" fn free_dora_context(context: *mut c_void) {
/// [`init_dora_context_from_env`]. The context must be still valid, i.e., not
/// freed yet.
#[no_mangle]
pub unsafe extern "C" fn dora_next_input(context: *mut c_void) -> *mut c_void {
pub unsafe extern "C" fn dora_next_event(context: *mut c_void) -> *mut c_void {
let context: &mut DoraContext = unsafe { &mut *context.cast() };
match context.inputs.recv() {
Ok(input) => Box::into_raw(Box::new(input)).cast(),
Err(flume::RecvError::Disconnected) => ptr::null_mut(),
match context.events.recv() {
Some(event) => Box::into_raw(Box::new(event)).cast(),
None => ptr::null_mut(),
}
}

/// Reads out the ID of the given input.
/// Reads out the type of the given event.
///
/// ## Safety
///
/// The `event` argument must be a dora event received through
/// [`dora_next_event`]. The event must be still valid, i.e., not
/// freed yet.
#[no_mangle]
pub unsafe extern "C" fn read_dora_event_type(event: *const ()) -> EventType {
let event: &Event = unsafe { &*event.cast() };
match event {
Event::Stop => EventType::Stop,
Event::Input { .. } => EventType::Input,
Event::InputClosed { .. } => EventType::InputClosed,
Event::Error(_) => EventType::Error,
_ => EventType::Unknown,
}
}

#[repr(C)]
pub enum EventType {
Stop,
Input,
InputClosed,
Error,
Unknown,
}

/// Reads out the ID of the given input event.
///
/// Writes the `out_ptr` and `out_len` with the start pointer and length of the
/// ID string of the input. The ID is guaranteed to be valid UTF-8.
///
/// Writes a null pointer and length `0` if the given event is not an input event.
///
/// ## Safety
///
/// The `input` argument must be a dora input received through
/// [`dora_next_input`]. The input must be still valid, i.e., not
/// The `event` argument must be a dora event received through
/// [`dora_next_event`]. The event must be still valid, i.e., not
/// freed yet. The returned `out_ptr` must not be used after
/// freeing the `input`, since it points directly into the input's
/// freeing the `event`, since it points directly into the event's
/// memory.
#[no_mangle]
pub unsafe extern "C" fn read_dora_input_id(
input: *const (),
event: *const (),
out_ptr: *mut *const u8,
out_len: *mut usize,
) {
let input: &Input = unsafe { &*input.cast() };
let id = input.id.as_str().as_bytes();
let ptr = id.as_ptr();
let len = id.len();
unsafe {
*out_ptr = ptr;
*out_len = len;
let event: &Event = unsafe { &*event.cast() };
match event {
Event::Input { id, .. } => {
let id = id.as_str().as_bytes();
let ptr = id.as_ptr();
let len = id.len();
unsafe {
*out_ptr = ptr;
*out_len = len;
}
}
_ => unsafe {
*out_ptr = ptr::null();
*out_len = 0;
},
}
}

/// Reads out the data of the given input.
/// Reads out the data of the given input event.
///
/// Writes the `out_ptr` and `out_len` with the start pointer and length of the
/// input's data array. The data array is a raw byte array, whose format
/// depends on the source operator/node.
///
/// Writes a null pointer and length `0` if the given event is not an input event
/// or when an input event has no associated data.
///
/// ## Safety
///
/// The `input` argument must be a dora input received through
/// [`dora_next_input`]. The input must be still valid, i.e., not
/// The `event` argument must be a dora event received through
/// [`dora_next_event`]. The event must be still valid, i.e., not
/// freed yet. The returned `out_ptr` must not be used after
/// freeing the `input`, since it points directly into the input's
/// freeing the `event`, since it points directly into the event's
/// memory.
#[no_mangle]
pub unsafe extern "C" fn read_dora_input_data(
input: *const (),
event: *const (),
out_ptr: *mut *const u8,
out_len: *mut usize,
) {
let input: &Input = unsafe { &*input.cast() };
let data = &input.data();
let ptr = data.as_ptr();
let len = data.len();
unsafe {
*out_ptr = ptr;
*out_len = len;
let event: &Event = unsafe { &*event.cast() };
match event {
Event::Input {
data: Some(data), ..
} => {
let ptr = data.as_ptr();
let len = data.len();
unsafe {
*out_ptr = ptr;
*out_len = len;
}
}
_ => unsafe {
*out_ptr = ptr::null();
*out_len = 0;
},
}
}

/// Frees the given dora input.
/// Frees the given dora event.
///
/// ## Safety
///
/// Only pointers created through [`dora_next_input`] are allowed
/// Only pointers created through [`dora_next_event`] are allowed
/// as arguments. Each context pointer must be freed exactly once. After
/// freeing, the pointer and all derived pointers must not be used anymore.
/// This also applies to the `read_dora_input_*` functions, which return
/// pointers into the original input structure.
/// This also applies to the `read_dora_event_*` functions, which return
/// pointers into the original event structure.
#[no_mangle]
pub unsafe extern "C" fn free_dora_input(input: *mut c_void) {
let _: Box<Input> = unsafe { Box::from_raw(input.cast()) };
pub unsafe extern "C" fn free_dora_event(event: *mut c_void) {
let _: Box<Event> = unsafe { Box::from_raw(event.cast()) };
}

/// Sends the given output to subscribed dora nodes/operators.
@@ -194,7 +243,7 @@ unsafe fn try_send_output(
let data = unsafe { slice::from_raw_parts(data_ptr, data_len) };
context
.node
.send_output(&output_id, Default::default(), data.len(), |out| {
.send_output(output_id, Default::default(), data.len(), |out| {
out.copy_from_slice(data);
})
}

+ 3
- 3
apis/c/operator/operator_api.h View File

@@ -18,8 +18,8 @@ extern "C"

EXPORT DoraResult_t dora_drop_operator(void *operator_context);

EXPORT OnInputResult_t dora_on_input(
const Input_t *input,
EXPORT OnEventResult_t dora_on_event(
const RawEvent_t *event,
const SendOutput_t *send_output,
void *operator_context);

@@ -27,7 +27,7 @@ extern "C"
{
DoraInitOperator_t __dora_init_operator = {.init_operator = dora_init_operator};
DoraDropOperator_t __dora_drop_operator = {.drop_operator = dora_drop_operator};
DoraOnInput_t __dora_on_input = {.on_input = dora_on_input};
DoraOnEvent_t __dora_on_event = {.on_event = dora_on_event};
}
#ifdef __cplusplus
} /* extern \"C\" */


+ 20
- 5
apis/c/operator/operator_types.h View File

@@ -77,13 +77,13 @@ enum DoraStatus {
DoraStatus_t;

/** <No documentation available> */
typedef struct OnInputResult {
typedef struct OnEventResult {
/** <No documentation available> */
DoraResult_t result;

/** <No documentation available> */
DoraStatus_t status;
} OnInputResult_t;
} OnEventResult_t;

/** <No documentation available> */
typedef struct Metadata {
@@ -103,6 +103,21 @@ typedef struct Input {
Metadata_t metadata;
} Input_t;


#include <stdbool.h>

/** <No documentation available> */
typedef struct RawEvent {
/** <No documentation available> */
Input_t * input;

/** <No documentation available> */
Vec_uint8_t input_closed;

/** <No documentation available> */
bool stop;
} RawEvent_t;

/** <No documentation available> */
typedef struct Output {
/** <No documentation available> */
@@ -139,10 +154,10 @@ typedef struct SendOutput {
} SendOutput_t;

/** <No documentation available> */
typedef struct DoraOnInput {
typedef struct DoraOnEvent {
/** <No documentation available> */
OnInputResult_t (*on_input)(Input_t const *, SendOutput_t const *, void *);
} DoraOnInput_t;
OnEventResult_t (*on_event)(RawEvent_t const *, SendOutput_t const *, void *);
} DoraOnEvent_t;


#ifdef __cplusplus


+ 48
- 24
apis/python/node/src/lib.rs View File

@@ -1,9 +1,8 @@
#![allow(clippy::borrow_deref_ref)] // clippy warns about code generated by #[pymethods]

use dora_node_api::{dora_core::config::NodeId, DoraNode, Input};
use dora_node_api::{DoraNode, Event, EventStream};
use dora_operator_api_python::{metadata_to_pydict, pydict_to_metadata};
use eyre::{Context, Result};
use flume::Receiver;
use pyo3::{
prelude::*,
types::{PyBytes, PyDict},
@@ -11,21 +10,53 @@ use pyo3::{

#[pyclass]
pub struct Node {
id: NodeId,
inputs: Receiver<Input>,
events: EventStream,
node: DoraNode,
}

pub struct PyInput(Input);
pub struct PyInput<'a>(Event<'a>);

impl IntoPy<PyObject> for PyInput {
impl IntoPy<PyObject> for PyInput<'_> {
fn into_py(self, py: Python) -> PyObject {
(
self.0.id.to_string(),
PyBytes::new(py, &self.0.data()),
metadata_to_pydict(self.0.metadata(), py),
)
.into_py(py)
let dict = PyDict::new(py);

let ty = match self.0 {
Event::Stop => "STOP",
Event::Input { id, metadata, data } => {
dict.set_item("id", id.to_string())
.wrap_err("failed to add input ID")
.unwrap();
dict.set_item(
"data",
PyBytes::new(py, data.as_deref().unwrap_or_default()),
)
.wrap_err("failed to add input data")
.unwrap();
dict.set_item("metadata", metadata_to_pydict(&metadata, py))
.wrap_err("failed to add input metadata")
.unwrap();
"INPUT"
}
Event::InputClosed { id } => {
dict.set_item("id", id.to_string())
.wrap_err("failed to add closed-input ID")
.unwrap();
"INPUT_CLOSED"
}
Event::Error(err) => {
dict.set_item("error", err)
.wrap_err("failed to add error")
.unwrap();
"ERROR"
}
_other => "UNKNOWN",
};

dict.set_item("type", ty)
.wrap_err("could not make type a python dictionary item")
.unwrap();

dict.into()
}
}

@@ -33,16 +64,9 @@ impl IntoPy<PyObject> for PyInput {
impl Node {
#[new]
pub fn new() -> Result<Self> {
let id = {
let raw =
std::env::var("DORA_NODE_ID").wrap_err("env variable DORA_NODE_ID must be set")?;
serde_yaml::from_str(&raw).context("failed to deserialize operator config")?
};

let mut node = DoraNode::init_from_env()?;
let inputs = node.inputs()?;
let (node, events) = DoraNode::init_from_env()?;

Ok(Node { id, inputs, node })
Ok(Node { events, node })
}

#[allow(clippy::should_implement_trait)]
@@ -51,7 +75,7 @@ impl Node {
}

pub fn __next__(&mut self) -> PyResult<Option<PyInput>> {
Ok(self.inputs.recv().ok().map(PyInput))
Ok(self.events.recv().map(PyInput))
}

fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
@@ -67,14 +91,14 @@ impl Node {
let data = data.as_bytes();
let metadata = pydict_to_metadata(metadata)?;
self.node
.send_output(&output_id.into(), metadata, data.len(), |out| {
.send_output(output_id.into(), metadata, data.len(), |out| {
out.copy_from_slice(data);
})
.wrap_err("Could not send output")
}

pub fn id(&self) -> String {
self.id.to_string()
self.node.id().to_string()
}
}



+ 5
- 6
apis/rust/node/Cargo.toml View File

@@ -5,25 +5,24 @@ edition = "2021"
license = "Apache-2.0"

[features]
default = ["zenoh", "iceoryx", "tracing-subscriber"]
zenoh = ["communication-layer-pub-sub/zenoh"]
iceoryx = ["communication-layer-pub-sub/iceoryx"]
default = ["tracing-subscriber"]
tracing-subscriber = ["dep:tracing-subscriber"]

[dependencies]
dora-core = { path = "../../../libraries/core" }
shared-memory-server = { path = "../../../libraries/shared-memory-server" }
eyre = "0.6.7"
once_cell = "1.13.0"
serde = { version = "1.0.136", features = ["derive"] }
serde_yaml = "0.8.23"
serde_json = "1.0.89"
thiserror = "1.0.30"
tracing = "0.1.33"
tracing-subscriber = { version = "0.3.15", optional = true }
flume = "0.10.14"
communication-layer-pub-sub = { path = "../../../libraries/communication-layer/pub-sub", default-features = false }
uuid = { version = "1.1.2", features = ["v4"] }
capnp = "0.14.11"
dora-message = { path = "../../../libraries/message" }
dora-core = { path = "../../../libraries/core" }
bincode = "1.3.3"

[dev-dependencies]
tokio = { version = "1.24.2", features = ["rt"] }

+ 0
- 249
apis/rust/node/src/communication.rs View File

@@ -1,249 +0,0 @@
use crate::BoxError;
use communication_layer_pub_sub::ReceivedSample;
pub use communication_layer_pub_sub::{CommunicationLayer, Publisher, Subscriber};
use dora_core::{
config::{CommunicationConfig, DataId, InputMapping, NodeId, OperatorId},
topics,
};
use dora_message::Metadata;
use eyre::Context;
use std::{
borrow::Cow,
collections::{BTreeMap, HashSet},
ops::Deref,
sync::Arc,
thread,
};

#[doc(hidden)]
pub const STOP_TOPIC: &str = "__dora_rs_internal__operator_stopped";

pub fn init(
communication_config: &CommunicationConfig,
) -> eyre::Result<Box<dyn CommunicationLayer>> {
match communication_config {
#[cfg(feature = "zenoh")]
CommunicationConfig::Zenoh {
config: zenoh_config,
prefix: zenoh_prefix,
} => {
let layer = communication_layer_pub_sub::zenoh::ZenohCommunicationLayer::init(
zenoh_config.deref().clone(),
zenoh_prefix.clone(),
)
.map_err(|err| eyre::eyre!(err))?;

Ok(Box::new(layer))
}
#[cfg(not(feature = "zenoh"))]
CommunicationConfig::Zenoh { .. } => {
eyre::bail!(
"cannot parse zenoh config because the compile-time `zenoh` feature \
of `dora-node-api` was disabled"
)
}
#[cfg(all(unix, feature = "iceoryx"))]
CommunicationConfig::Iceoryx {
app_name_prefix,
topic_prefix,
} => {
let app_name_prefix = app_name_prefix.clone();
let app_name = format!("{app_name_prefix}-{}", uuid::Uuid::new_v4());
let instance_name = topic_prefix.clone();
let layer = communication_layer_pub_sub::iceoryx::IceoryxCommunicationLayer::init(
app_name,
"dora".into(),
instance_name,
)
.map_err(|err| eyre::eyre!(err))?;

Ok(Box::new(layer))
}
#[cfg(not(all(unix, feature = "iceoryx")))]
CommunicationConfig::Iceoryx { .. } => {
eyre::bail!(
"cannot parse iceoryx config because the compile-time `iceoryx` feature \
of `dora-node-api` was disabled"
)
}
}
}

pub fn subscribe_all(
communication: &mut dyn CommunicationLayer,
inputs: &BTreeMap<DataId, InputMapping>,
) -> eyre::Result<flume::Receiver<Input>> {
let (inputs_tx, inputs_rx) = flume::bounded(10);
let inputs_tx = Arc::new(inputs_tx);
for (input, mapping) in inputs {
let topic = mapping.to_string();
let mut sub = communication
.subscribe(&topic)
.map_err(|err| eyre::eyre!(err))
.wrap_err_with(|| format!("failed to subscribe on {topic}"))?;

let input_id = input.to_owned();
let sender = inputs_tx.clone();
thread::spawn(move || loop {
let event = match sub.recv().transpose() {
None => break,
Some(Ok(sample)) => {
let mut raw: &[u8] = &sample.get();
let full_len = raw.len();
match Metadata::deserialize(&mut raw).with_context(|| {
format!("failed to deserialize metadata for `{input_id}` message")
}) {
Ok(metadata) => InputEvent::Input(Input {
id: input_id.clone(),
metadata,
data: Data {
offset: full_len - raw.len(),
sample,
},
}),
Err(err) => InputEvent::ParseMessageError(err),
}
}
Some(Err(err)) => InputEvent::Error(err),
};
match sender.send(event) {
Ok(()) => {}
Err(flume::SendError(_)) => break,
}
});
}

let mut sources: HashSet<_> = inputs
.values()
.map(|v| (v.source().to_owned(), v.operator().to_owned()))
.collect();
for (source, operator) in &sources {
let topic = match operator {
Some(operator) => format!("{source}/{operator}/{STOP_TOPIC}"),
None => format!("{source}/{STOP_TOPIC}"),
};
let mut sub = communication
.subscribe(&topic)
.map_err(|err| eyre::eyre!(err))
.wrap_err_with(|| format!("failed to subscribe on {topic}"))?;

let source = source.to_owned();
let operator = operator.clone();
let sender = inputs_tx.clone();
thread::spawn(move || loop {
let event = match sub.recv().transpose() {
None => break,
Some(Ok(_)) => InputEvent::SourceClosed {
source: source.clone(),
operator: operator.clone(),
},
Some(Err(err)) => InputEvent::Error(err),
};
match sender.send(event) {
Ok(()) => {}
Err(flume::SendError(_)) => break,
}
});
}

// subscribe to topic for manual stops
{
let topic = topics::MANUAL_STOP;
let mut sub = communication
.subscribe(topic)
.map_err(|err| eyre::eyre!(err))
.wrap_err_with(|| format!("failed to subscribe on {topic}"))?;

// only keep a weak reference to the sender because we don't want to
// prevent it from being closed (e.g. when all sources are closed)
let sender = Arc::downgrade(&inputs_tx);
std::mem::drop(inputs_tx);

thread::spawn(move || loop {
let event = match sub.recv().transpose() {
None => break,
Some(Ok(_)) => InputEvent::ManualStop,
Some(Err(err)) => InputEvent::Error(err),
};
match sender.upgrade() {
Some(sender) => match sender.send(event) {
Ok(()) => {}
Err(flume::SendError(_)) => break,
},
None => break,
}
});
}

let (combined_tx, combined) = flume::bounded(1);
thread::spawn(move || loop {
match inputs_rx.recv() {
Ok(InputEvent::Input(message)) => match combined_tx.send(message) {
Ok(()) => {}
Err(flume::SendError(_)) => break,
},
Ok(InputEvent::SourceClosed { source, operator }) => {
sources.remove(&(source, operator));
if sources.is_empty() {
break;
}
}
Ok(InputEvent::ManualStop) => {
tracing::info!("received manual stop message");
break;
}
Ok(InputEvent::ParseMessageError(err)) => {
tracing::warn!("{err:?}");
}
Ok(InputEvent::Error(err)) => panic!("{err}"),
Err(_) => break,
}
});

Ok(combined)
}

enum InputEvent {
Input(Input),
SourceClosed {
source: NodeId,
operator: Option<OperatorId>,
},
ManualStop,
Error(BoxError),
ParseMessageError(eyre::Report),
}

pub struct Input {
pub id: DataId,
pub metadata: Metadata<'static>,
pub data: Data,
}

impl Input {
pub fn data(&self) -> Cow<[u8]> {
self.data.get()
}

pub fn metadata(&self) -> &Metadata {
&self.metadata
}
}

pub struct Data {
sample: Box<dyn ReceivedSample>,
offset: usize,
}

impl Data {
fn get(&self) -> Cow<[u8]> {
match self.sample.get() {
std::borrow::Cow::Borrowed(data) => Cow::Borrowed(&data[self.offset..]),
std::borrow::Cow::Owned(mut data) => {
// TODO avoid copy caused by moving the remaining elements to the front
data.drain(..self.offset);
Cow::Owned(data)
}
}
}
}

+ 470
- 0
apis/rust/node/src/daemon/mod.rs View File

@@ -0,0 +1,470 @@
use dora_core::{
config::{DataId, NodeId},
daemon_messages::{DaemonCommunication, DaemonReply, DaemonRequest, DataflowId, NodeEvent},
message::Metadata,
};
use eyre::{bail, eyre, Context};
use flume::RecvTimeoutError;
use shared_memory_server::{Shmem, ShmemClient, ShmemConf};
use std::{marker::PhantomData, net::TcpStream, sync::Arc, time::Duration};

mod tcp;

pub(crate) struct DaemonConnection {
pub control_channel: ControlChannel,
pub event_stream: EventStream,
}

impl DaemonConnection {
pub(crate) fn init(
dataflow_id: DataflowId,
node_id: &NodeId,
daemon_communication: &DaemonCommunication,
) -> eyre::Result<Self> {
let (control, events) = match daemon_communication {
DaemonCommunication::Shmem {
daemon_control_region_id,
daemon_events_region_id,
} => {
let control = unsafe { DaemonChannel::new_shmem(daemon_control_region_id) }
.wrap_err("failed to create shmem control channel")?;
let events = unsafe { DaemonChannel::new_shmem(daemon_events_region_id) }
.wrap_err("failed to create shmem event channel")?;
(control, events)
}
DaemonCommunication::Tcp { socket_addr } => {
let control = DaemonChannel::new_tcp(
TcpStream::connect(socket_addr).wrap_err("failed to connect control stream")?,
)?;
let events = DaemonChannel::new_tcp(
TcpStream::connect(socket_addr).wrap_err("failed to connect event stream")?,
)?;
(control, events)
}
};

let mut control_channel = ControlChannel::init(dataflow_id, node_id, control)
.wrap_err("failed to init control stream")?;

let (event_stream, event_stream_thread_handle) =
EventStream::init(dataflow_id, node_id, events)
.wrap_err("failed to init event stream")?;

control_channel.event_stream_thread_handle = Some(event_stream_thread_handle);

Ok(Self {
control_channel,
event_stream,
})
}
}

pub(crate) struct ControlChannel {
channel: DaemonChannel,
event_stream_thread_handle: Option<Arc<EventStreamThreadHandle>>,
}

impl ControlChannel {
#[tracing::instrument(skip(channel))]
fn init(
dataflow_id: DataflowId,
node_id: &NodeId,
mut channel: DaemonChannel,
) -> eyre::Result<Self> {
register(dataflow_id, node_id.clone(), &mut channel)?;

Ok(Self {
channel,
event_stream_thread_handle: None,
})
}

pub fn report_stop(&mut self) -> eyre::Result<()> {
let reply = self
.channel
.request(&DaemonRequest::Stopped)
.wrap_err("failed to report stopped to dora-daemon")?;
match reply {
dora_core::daemon_messages::DaemonReply::Result(result) => result
.map_err(|e| eyre!(e))
.wrap_err("failed to report stop event to dora-daemon")?,
other => bail!("unexpected stopped reply: {other:?}"),
}
Ok(())
}

pub fn report_closed_outputs(&mut self, outputs: Vec<DataId>) -> eyre::Result<()> {
let reply = self
.channel
.request(&DaemonRequest::CloseOutputs(outputs))
.wrap_err("failed to report closed outputs to dora-daemon")?;
match reply {
dora_core::daemon_messages::DaemonReply::Result(result) => result
.map_err(|e| eyre!(e))
.wrap_err("failed to receive closed outputs reply from dora-daemon")?,
other => bail!("unexpected closed outputs reply: {other:?}"),
}
Ok(())
}

pub fn prepare_message(
&mut self,
output_id: DataId,
metadata: Metadata<'static>,
data_len: usize,
) -> eyre::Result<MessageSample> {
let reply = self
.channel
.request(&DaemonRequest::PrepareOutputMessage {
output_id,
metadata,
data_len,
})
.wrap_err("failed to send PrepareOutputMessage request to dora-daemon")?;
match reply {
dora_core::daemon_messages::DaemonReply::PreparedMessage {
shared_memory_id: id,
} => Ok(MessageSample { id }),
dora_core::daemon_messages::DaemonReply::Result(Err(err)) => {
Err(eyre!(err).wrap_err("failed to report stop event to dora-daemon"))
}
other => bail!("unexpected PrepareOutputMessage reply: {other:?}"),
}
}

pub fn send_prepared_message(&mut self, sample: MessageSample) -> eyre::Result<()> {
let reply = self
.channel
.request(&DaemonRequest::SendPreparedMessage { id: sample.id })
.wrap_err("failed to send SendOutMessage request to dora-daemon")?;
match reply {
dora_core::daemon_messages::DaemonReply::Result(result) => {
result.map_err(|err| eyre!(err))
}
other => bail!("unexpected SendOutMessage reply: {other:?}"),
}
}

pub fn send_empty_message(
&mut self,
output_id: DataId,
metadata: Metadata<'static>,
) -> eyre::Result<()> {
let reply = self
.channel
.request(&DaemonRequest::SendEmptyMessage {
output_id,
metadata,
})
.wrap_err("failed to send SendEmptyMessage request to dora-daemon")?;
match reply {
dora_core::daemon_messages::DaemonReply::Result(result) => {
result.map_err(|err| eyre!(err))
}
other => bail!("unexpected SendEmptyMessage reply: {other:?}"),
}
}
}

enum DaemonChannel {
Shmem(ShmemClient<DaemonRequest, DaemonReply>),
Tcp(TcpStream),
}

impl DaemonChannel {
#[tracing::instrument]
fn new_tcp(stream: TcpStream) -> eyre::Result<Self> {
stream.set_nodelay(true).context("failed to set nodelay")?;
Ok(DaemonChannel::Tcp(stream))
}

#[tracing::instrument]
unsafe fn new_shmem(daemon_control_region_id: &str) -> eyre::Result<Self> {
let daemon_events_region = ShmemConf::new()
.os_id(daemon_control_region_id)
.open()
.wrap_err("failed to connect to dora-daemon")?;
let channel = DaemonChannel::Shmem(
unsafe { ShmemClient::new(daemon_events_region, Some(Duration::from_secs(5))) }
.wrap_err("failed to create ShmemChannel")?,
);
Ok(channel)
}

fn request(&mut self, request: &DaemonRequest) -> eyre::Result<DaemonReply> {
match self {
DaemonChannel::Shmem(client) => client.request(request),
DaemonChannel::Tcp(stream) => tcp::request(stream, request),
}
}
}

fn register(
dataflow_id: DataflowId,
node_id: NodeId,
channel: &mut DaemonChannel,
) -> eyre::Result<()> {
let msg = DaemonRequest::Register {
dataflow_id,
node_id,
};
let reply = channel
.request(&msg)
.wrap_err("failed to send register request to dora-daemon")?;

match reply {
dora_core::daemon_messages::DaemonReply::Result(result) => result
.map_err(|e| eyre!(e))
.wrap_err("failed to register node with dora-daemon")?,
other => bail!("unexpected register reply: {other:?}"),
}
Ok(())
}

enum EventItem {
NodeEvent {
event: NodeEvent,
ack_channel: std::sync::mpsc::Sender<()>,
},
FatalError(eyre::Report),
}

pub struct EventStream {
receiver: flume::Receiver<EventItem>,
_thread_handle: Arc<EventStreamThreadHandle>,
}

impl EventStream {
fn init(
dataflow_id: DataflowId,
node_id: &NodeId,
mut channel: DaemonChannel,
) -> eyre::Result<(Self, Arc<EventStreamThreadHandle>)> {
register(dataflow_id, node_id.clone(), &mut channel)?;

channel
.request(&DaemonRequest::Subscribe)
.map_err(|e| eyre!(e))
.wrap_err("failed to create subscription with dora-daemon")?;

let (tx, rx) = flume::bounded(0);
let mut drop_tokens = Vec::new();
let node_id = node_id.clone();
let join_handle = std::thread::spawn(move || {
let result = loop {
let daemon_request = DaemonRequest::NextEvent {
drop_tokens: std::mem::take(&mut drop_tokens),
};
let event: NodeEvent = match channel.request(&daemon_request) {
Ok(DaemonReply::NodeEvent(event)) => event,
Ok(DaemonReply::Closed) => {
tracing::debug!("Event stream closed for node ID `{node_id}`");
break Ok(());
}
Ok(other) => {
let err = eyre!("unexpected control reply: {other:?}");
tracing::warn!("{err:?}");
continue;
}
Err(err) => {
let err = eyre!(err).wrap_err("failed to receive incoming event");
tracing::warn!("{err:?}");
continue;
}
};
let drop_token = match &event {
NodeEvent::Input {
data: Some(data), ..
} => Some(data.drop_token.clone()),
NodeEvent::Stop
| NodeEvent::InputClosed { .. }
| NodeEvent::Input { data: None, .. } => None,
};

let (drop_tx, drop_rx) = std::sync::mpsc::channel();
match tx.send(EventItem::NodeEvent {
event,
ack_channel: drop_tx,
}) {
Ok(()) => {}
Err(_) => {
// receiving end of channel was closed
break Ok(());
}
}

match drop_rx.recv_timeout(Duration::from_secs(30)) {
Ok(()) => break Err(eyre!("Node API should not send anything on ACK channel")),
Err(std::sync::mpsc::RecvTimeoutError::Timeout) => {
tracing::warn!("timeout while waiting for input ACK");
}
Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => {} // expected result
}

if let Some(token) = drop_token {
drop_tokens.push(token);
}
};
if let Err(err) = result {
if let Err(flume::SendError(item)) = tx.send(EventItem::FatalError(err)) {
let err = match item {
EventItem::FatalError(err) => err,
_ => unreachable!(),
};
tracing::error!("failed to report fatal EventStream error: {err:?}");
}
}
});

let thread_handle = EventStreamThreadHandle::new(join_handle);

Ok((
EventStream {
receiver: rx,
_thread_handle: thread_handle.clone(),
},
thread_handle,
))
}

pub fn recv(&mut self) -> Option<Event> {
let event = self.receiver.recv();
self.recv_common(event)
}

pub async fn recv_async(&mut self) -> Option<Event> {
let event = self.receiver.recv_async().await;
self.recv_common(event)
}

fn recv_common(&mut self, event: Result<EventItem, flume::RecvError>) -> Option<Event> {
let event = match event {
Ok(event) => event,
Err(flume::RecvError::Disconnected) => {
tracing::info!("event channel disconnected");
return None;
}
};
let event = match event {
EventItem::NodeEvent { event, ack_channel } => match event {
NodeEvent::Stop => Event::Stop,
NodeEvent::InputClosed { id } => Event::InputClosed { id },
NodeEvent::Input { id, metadata, data } => {
let mapped = data
.map(|d| unsafe { MappedInputData::map(&d.shared_memory_id, d.len) })
.transpose();
match mapped {
Ok(mapped) => Event::Input {
id,
metadata,
data: mapped.map(|data| Data {
data,
_drop: ack_channel,
}),
},
Err(err) => Event::Error(format!("{err:?}")),
}
}
},
EventItem::FatalError(err) => {
Event::Error(format!("fatal event stream error: {err:?}"))
}
};

Some(event)
}
}

pub struct MessageSample {
pub id: String,
}

#[derive(Debug)]
#[non_exhaustive]
pub enum Event<'a> {
Stop,
Input {
id: DataId,
metadata: Metadata<'static>,
data: Option<Data<'a>>,
},
InputClosed {
id: DataId,
},
Error(String),
}

pub struct Data<'a> {
data: MappedInputData<'a>,
_drop: std::sync::mpsc::Sender<()>,
}

impl std::ops::Deref for Data<'_> {
type Target = [u8];

fn deref(&self) -> &Self::Target {
&self.data
}
}

impl std::fmt::Debug for Data<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Data").finish_non_exhaustive()
}
}

pub struct MappedInputData<'a> {
memory: Shmem,
len: usize,
_data: PhantomData<&'a [u8]>,
}

impl MappedInputData<'_> {
unsafe fn map(shared_memory_id: &str, len: usize) -> eyre::Result<Self> {
let memory = ShmemConf::new()
.os_id(shared_memory_id)
.open()
.wrap_err("failed to map shared memory input")?;
Ok(MappedInputData {
memory,
len,
_data: PhantomData,
})
}
}

impl std::ops::Deref for MappedInputData<'_> {
type Target = [u8];

fn deref(&self) -> &Self::Target {
unsafe { &self.memory.as_slice()[..self.len] }
}
}

struct EventStreamThreadHandle(flume::Receiver<std::thread::Result<()>>);
impl EventStreamThreadHandle {
fn new(join_handle: std::thread::JoinHandle<()>) -> Arc<Self> {
let (tx, rx) = flume::bounded(1);
std::thread::spawn(move || {
let _ = tx.send(join_handle.join());
});
Arc::new(Self(rx))
}
}

impl Drop for EventStreamThreadHandle {
fn drop(&mut self) {
match self.0.recv_timeout(Duration::from_secs(2)) {
Ok(Ok(())) => {}
Ok(Err(_)) => {
tracing::error!("event stream thread panicked");
}
Err(RecvTimeoutError::Timeout) => {
tracing::warn!("timeout while waiting for event stream thread");
}
Err(RecvTimeoutError::Disconnected) => {
tracing::warn!("event stream thread result channel closed unexpectedly");
}
}
}
}

+ 58
- 0
apis/rust/node/src/daemon/tcp.rs View File

@@ -0,0 +1,58 @@
use dora_core::daemon_messages::{DaemonReply, DaemonRequest};
use eyre::{eyre, Context};
use std::{
io::{Read, Write},
net::TcpStream,
};

pub fn request(connection: &mut TcpStream, request: &DaemonRequest) -> eyre::Result<DaemonReply> {
send_message(connection, request)?;
receive_reply(connection)
.and_then(|reply| reply.ok_or_else(|| eyre!("server disconnected unexpectedly")))
}

fn send_message(connection: &mut TcpStream, message: &DaemonRequest) -> eyre::Result<()> {
let serialized = bincode::serialize(&message).wrap_err("failed to serialize DaemonRequest")?;
tcp_send(connection, &serialized).wrap_err("failed to send DaemonRequest")?;
Ok(())
}

fn receive_reply(connection: &mut TcpStream) -> eyre::Result<Option<DaemonReply>> {
let raw = match tcp_receive(connection) {
Ok(raw) => raw,
Err(err) => match err.kind() {
std::io::ErrorKind::UnexpectedEof | std::io::ErrorKind::ConnectionAborted => {
return Ok(None)
}
other => {
return Err(err).with_context(|| {
format!(
"unexpected I/O error (kind {other:?}) while trying to receive DaemonReply"
)
})
}
},
};
bincode::deserialize(&raw)
.wrap_err("failed to deserialize DaemonReply")
.map(Some)
}

fn tcp_send(connection: &mut (impl Write + Unpin), message: &[u8]) -> std::io::Result<()> {
let len_raw = (message.len() as u64).to_le_bytes();
connection.write_all(&len_raw)?;
connection.write_all(message)?;
connection.flush()?;
Ok(())
}

fn tcp_receive(connection: &mut (impl Read + Unpin)) -> std::io::Result<Vec<u8>> {
let reply_len = {
let mut raw = [0; 8];
connection.read_exact(&mut raw)?;
u64::from_le_bytes(raw) as usize
};
let mut reply = vec![0; reply_len];
connection.read_exact(&mut reply)?;
Ok(reply)
}

+ 79
- 115
apis/rust/node/src/lib.rs View File

@@ -1,65 +1,63 @@
pub use communication::Input;
use communication::STOP_TOPIC;
use communication_layer_pub_sub::{CommunicationLayer, Publisher};
use daemon::{ControlChannel, DaemonConnection};
pub use daemon::{Event, EventStream};
pub use dora_core;
use dora_core::config::{CommunicationConfig, DataId, NodeId, NodeRunConfig};
pub use dora_message::{uhlc, Metadata, MetadataParameters};
pub use dora_core::message::{uhlc, Metadata, MetadataParameters};
use dora_core::{
config::{DataId, NodeId, NodeRunConfig},
daemon_messages::NodeConfig,
};
use eyre::WrapErr;
pub use flume::Receiver;
use shared_memory_server::ShmemConf;

pub mod communication;
mod daemon;

pub struct DoraNode {
id: NodeId,
node_config: NodeRunConfig,
communication: Box<dyn CommunicationLayer>,
control_channel: ControlChannel,
hlc: uhlc::HLC,
}

impl DoraNode {
pub fn init_from_env() -> eyre::Result<Self> {
pub fn init_from_env() -> eyre::Result<(Self, EventStream)> {
#[cfg(feature = "tracing-subscriber")]
set_up_tracing().context("failed to set up tracing subscriber")?;

let id = {
let raw =
std::env::var("DORA_NODE_ID").wrap_err("env variable DORA_NODE_ID must be set")?;
serde_yaml::from_str(&raw).context("failed to deserialize operator config")?
};
let node_config = {
let raw = std::env::var("DORA_NODE_RUN_CONFIG")
.wrap_err("env variable DORA_NODE_RUN_CONFIG must be set")?;
let raw = std::env::var("DORA_NODE_CONFIG")
.wrap_err("env variable DORA_NODE_CONFIG must be set")?;
serde_yaml::from_str(&raw).context("failed to deserialize operator config")?
};
let communication_config = {
let raw = std::env::var("DORA_COMMUNICATION_CONFIG")
.wrap_err("env variable DORA_COMMUNICATION_CONFIG must be set")?;
serde_yaml::from_str(&raw).context("failed to deserialize communication config")?
};
Self::init(id, node_config, communication_config)
Self::init(node_config)
}

pub fn init(
id: NodeId,
node_config: NodeRunConfig,
communication_config: CommunicationConfig,
) -> eyre::Result<Self> {
let communication = communication::init(&communication_config)?;
Ok(Self {
id,
node_config,
communication,
pub fn init(node_config: NodeConfig) -> eyre::Result<(Self, EventStream)> {
let NodeConfig {
dataflow_id,
node_id,
run_config,
daemon_communication,
} = node_config;

let DaemonConnection {
control_channel,
event_stream,
} = DaemonConnection::init(dataflow_id, &node_id, &daemon_communication)
.wrap_err("failed to connect to dora-daemon")?;

let node = Self {
id: node_id,
node_config: run_config,
control_channel,
hlc: uhlc::HLC::default(),
})
}

pub fn inputs(&mut self) -> eyre::Result<flume::Receiver<Input>> {
communication::subscribe_all(self.communication.as_mut(), &self.node_config.inputs)
};
Ok((node, event_stream))
}

pub fn send_output<F>(
&mut self,
output_id: &DataId,
output_id: DataId,
parameters: MetadataParameters,
data_len: usize,
data: F,
@@ -67,33 +65,49 @@ impl DoraNode {
where
F: FnOnce(&mut [u8]),
{
if !self.node_config.outputs.contains(output_id) {
if !self.node_config.outputs.contains(&output_id) {
eyre::bail!("unknown output");
}
let metadata = Metadata::from_parameters(self.hlc.new_timestamp(), parameters);
let serialized_metadata = metadata
.serialize()
.with_context(|| format!("failed to serialize `{}` message", output_id))?;
let full_len = serialized_metadata.len() + data_len;

let self_id = &self.id;
let topic = format!("{self_id}/{output_id}");
let publisher = self
.communication
.publisher(&topic)
.map_err(|err| eyre::eyre!(err))
.wrap_err_with(|| format!("failed create publisher for output {output_id}"))?;

let mut sample = publisher
.prepare(full_len)
.map_err(|err| eyre::eyre!(err))?;
let raw = sample.as_mut_slice();
raw[..serialized_metadata.len()].copy_from_slice(&serialized_metadata);
data(&mut raw[serialized_metadata.len()..]);
sample
.publish()
.map_err(|err| eyre::eyre!(err))
.wrap_err_with(|| format!("failed to send data for output {output_id}"))?;
let metadata = Metadata::from_parameters(self.hlc.new_timestamp(), parameters.into_owned());

if data_len > 0 {
let sample = self
.control_channel
.prepare_message(output_id.clone(), metadata, data_len)
.wrap_err("failed to prepare sample for output message")?;
// map shared memory and fill in data
let mut shared_memory = ShmemConf::new()
.os_id(&sample.id)
.open()
.wrap_err("failed to open shared memory sample")?;

let raw = unsafe { shared_memory.as_slice_mut() };
data(&mut raw[..data_len]);

self.control_channel
.send_prepared_message(sample)
.wrap_err_with(|| format!("failed to send data for output {output_id}"))?;
} else {
data(&mut []);
self.control_channel
.send_empty_message(output_id.clone(), metadata)
.wrap_err_with(|| format!("failed to send output {output_id}"))?;
}

Ok(())
}

pub fn close_outputs(&mut self, outputs: Vec<DataId>) -> eyre::Result<()> {
for output_id in &outputs {
if !self.node_config.outputs.remove(output_id) {
eyre::bail!("unknown output {output_id}");
}
}

self.control_channel
.report_closed_outputs(outputs)
.wrap_err("failed to report closed outputs to daemon")?;

Ok(())
}

@@ -109,25 +123,9 @@ impl DoraNode {
impl Drop for DoraNode {
#[tracing::instrument(skip(self), fields(self.id = %self.id))]
fn drop(&mut self) {
let self_id = &self.id;
let topic = format!("{self_id}/{STOP_TOPIC}");
let result = self
.communication
.publisher(&topic)
.map_err(|err| eyre::eyre!(err))
.wrap_err_with(|| {
format!("failed to create publisher for stop message for node `{self_id}`")
})
.and_then(|p| {
p.publish(&[])
.map_err(|err| eyre::eyre!(err))
.wrap_err_with(|| format!("failed to send stop message for node `{self_id}`"))
});
match result {
Ok(()) => tracing::info!("sent stop message for {self_id}"),
Err(err) => {
tracing::error!("{err:?}")
}
tracing::info!("reporting node stop for node `{}`", self.id);
if let Err(err) = self.control_channel.report_stop() {
tracing::error!("{err:?}")
}
}
}
@@ -143,37 +141,3 @@ fn set_up_tracing() -> eyre::Result<()> {
tracing::subscriber::set_global_default(subscriber)
.context("failed to set tracing global subscriber")
}

pub fn manual_stop_publisher(
communication: &mut dyn CommunicationLayer,
) -> eyre::Result<Box<dyn Publisher>> {
let publisher = communication
.publisher(dora_core::topics::MANUAL_STOP)
.map_err(|err| eyre::eyre!(err))?;
Ok(publisher)
}

#[cfg(test)]
mod tests {
use dora_core::config;

use super::*;

#[test]
fn no_op_operator() {
let id = uuid::Uuid::new_v4().to_string().into();
let node_config = config::NodeRunConfig {
inputs: Default::default(),
outputs: Default::default(),
};
let communication_config = config::CommunicationConfig::Zenoh {
config: Default::default(),
prefix: format!("/{}", uuid::Uuid::new_v4()),
};

let mut node = DoraNode::init(id, node_config, communication_config).unwrap();

let inputs = node.inputs().unwrap();
assert!(inputs.recv().is_err());
}
}

+ 9
- 9
apis/rust/operator/macros/src/lib.rs View File

@@ -49,26 +49,26 @@ fn register_operator_impl(item: &TokenStream2) -> syn::Result<TokenStream2> {
};
};

let on_input = quote! {
let on_event = quote! {
#[no_mangle]
pub unsafe extern "C" fn dora_on_input(
input: &dora_operator_api::types::Input,
pub unsafe extern "C" fn dora_on_event(
event: &dora_operator_api::types::RawEvent,
send_output: &dora_operator_api::types::SendOutput,
operator_context: *mut std::ffi::c_void,
) -> dora_operator_api::types::OnInputResult {
dora_operator_api::raw::dora_on_input::<#operator_ty>(
input, send_output, operator_context
) -> dora_operator_api::types::OnEventResult {
dora_operator_api::raw::dora_on_event::<#operator_ty>(
event, send_output, operator_context
)
}

const _DORA_ON_INPUT: dora_operator_api::types::DoraOnInput = dora_operator_api::types::DoraOnInput {
on_input: dora_operator_api::types::OnInputFn(dora_on_input),
const _DORA_ON_EVENT: dora_operator_api::types::DoraOnEvent = dora_operator_api::types::DoraOnEvent {
on_event: dora_operator_api::types::OnEventFn(dora_on_event),
};
};

Ok(quote! {
#init
#drop
#on_input
#on_event
})
}

+ 10
- 3
apis/rust/operator/src/lib.rs View File

@@ -8,12 +8,19 @@ use types::{Metadata, Output, SendOutput};

pub mod raw;

#[derive(Debug)]
#[non_exhaustive]
pub enum Event<'a> {
Input { id: &'a str, data: &'a [u8] },
InputClosed { id: &'a str },
Stop,
}

pub trait DoraOperator: Default {
#[allow(clippy::result_unit_err)] // we use a () error type only for testing
fn on_input(
fn on_event(
&mut self,
id: &str,
data: &[u8],
event: &Event,
output_sender: &mut DoraOutputSender,
) -> Result<DoraStatus, String>;
}


+ 26
- 9
apis/rust/operator/src/raw.rs View File

@@ -1,5 +1,5 @@
use crate::{DoraOperator, DoraOutputSender, DoraStatus};
use dora_operator_api_types::{DoraInitResult, DoraResult, Input, OnInputResult, SendOutput};
use crate::{DoraOperator, DoraOutputSender, DoraStatus, Event};
use dora_operator_api_types::{DoraInitResult, DoraResult, OnEventResult, RawEvent, SendOutput};
use std::ffi::c_void;

pub type OutputFnRaw = unsafe extern "C" fn(
@@ -26,21 +26,38 @@ pub unsafe fn dora_drop_operator<O>(operator_context: *mut c_void) -> DoraResult
DoraResult { error: None }
}

pub unsafe fn dora_on_input<O: DoraOperator>(
input: &Input,
pub unsafe fn dora_on_event<O: DoraOperator>(
event: &RawEvent,
send_output: &SendOutput,
operator_context: *mut std::ffi::c_void,
) -> OnInputResult {
) -> OnEventResult {
let mut output_sender = DoraOutputSender(send_output);

let operator: &mut O = unsafe { &mut *operator_context.cast() };
let data = input.data.as_ref().as_slice();
match operator.on_input(&input.id, data, &mut output_sender) {
Ok(status) => OnInputResult {

let event_variant = if let Some(input) = &event.input {
let data = input.data.as_ref().as_slice();
Event::Input {
id: &input.id,
data,
}
} else if let Some(input_id) = &event.input_closed {
Event::InputClosed { id: input_id }
} else if event.stop {
Event::Stop
} else {
// ignore unknown events
return OnEventResult {
result: DoraResult { error: None },
status: DoraStatus::Continue,
};
};
match operator.on_event(&event_variant, &mut output_sender) {
Ok(status) => OnEventResult {
result: DoraResult { error: None },
status,
},
Err(error) => OnInputResult {
Err(error) => OnEventResult {
result: DoraResult {
error: Some(error.into()),
},


+ 16
- 6
apis/rust/operator/types/src/lib.rs View File

@@ -37,21 +37,31 @@ pub struct DoraResult {
#[derive_ReprC]
#[ffi_export]
#[repr(C)]
pub struct DoraOnInput {
pub on_input: OnInputFn,
pub struct DoraOnEvent {
pub on_event: OnEventFn,
}

#[derive_ReprC]
#[ffi_export]
#[repr(transparent)]
pub struct OnInputFn(
pub struct OnEventFn(
pub unsafe extern "C" fn(
input: &Input,
event: &RawEvent,
send_output: &SendOutput,
operator_context: *mut std::ffi::c_void,
) -> OnInputResult,
) -> OnEventResult,
);

#[derive_ReprC]
#[ffi_export]
#[repr(C)]
#[derive(Debug)]
pub struct RawEvent {
pub input: Option<safer_ffi::boxed::Box<Input>>,
pub input_closed: Option<safer_ffi::String>,
pub stop: bool,
}

#[derive_ReprC]
#[ffi_export]
#[repr(C)]
@@ -91,7 +101,7 @@ pub struct Output {
#[ffi_export]
#[repr(C)]
#[derive(Debug)]
pub struct OnInputResult {
pub struct OnEventResult {
pub result: DoraResult,
pub status: DoraStatus,
}


+ 0
- 1
binaries/cli/Cargo.toml View File

@@ -21,6 +21,5 @@ serde_json = "1.0.86"
termcolor = "1.1.3"
atty = "0.2.14"
uuid = { version = "1.2.1", features = ["v4", "serde"] }
sysinfo = "0.26.6"
inquire = "0.5.2"
communication-layer-request-reply = { path = "../../libraries/communication-layer/request-reply" }

+ 60
- 55
binaries/cli/src/check.rs View File

@@ -1,12 +1,12 @@
use crate::{control_connection, graph::read_descriptor};
use dora_core::{
adjust_shared_library_path,
config::{InputMapping, UserInputMapping},
config::{DataId, InputMapping, OperatorId, UserInputMapping},
descriptor::{self, source_is_url, CoreNodeKind, OperatorSource},
topics::{ControlRequest, ControlRequestReply},
};
use eyre::{bail, eyre, Context};
use std::{env::consts::EXE_EXTENSION, io::Write, path::Path};
use sysinfo::SystemExt;
use termcolor::{Color, ColorChoice, ColorSpec, WriteColor};

pub fn check_environment() -> eyre::Result<()> {
@@ -20,7 +20,6 @@ pub fn check_environment() -> eyre::Result<()> {
let mut stdout = termcolor::StandardStream::stdout(color_choice);

// check whether coordinator is running

write!(stdout, "Dora Coordinator: ")?;
if coordinator_running()? {
let _ = stdout.set_color(ColorSpec::new().set_fg(Some(Color::Green)));
@@ -32,20 +31,18 @@ pub fn check_environment() -> eyre::Result<()> {
}
let _ = stdout.reset();

// check whether roudi is running
write!(stdout, "Iceoryx Daemon: ")?;
let system = sysinfo::System::new_all();
match system.processes_by_exact_name("iox-roudi").next() {
Some(_) => {
let _ = stdout.set_color(ColorSpec::new().set_fg(Some(Color::Green)));
writeln!(stdout, "ok")?;
}
None => {
let _ = stdout.set_color(ColorSpec::new().set_fg(Some(Color::Red)));
writeln!(stdout, "not running")?;
error_occured = true;
}
// check whether daemon is running
write!(stdout, "Dora Daemon: ")?;
if daemon_running()? {
let _ = stdout.set_color(ColorSpec::new().set_fg(Some(Color::Green)));
writeln!(stdout, "ok")?;
} else {
let _ = stdout.set_color(ColorSpec::new().set_fg(Some(Color::Red)));
writeln!(stdout, "not running")?;
error_occured = true;
}
let _ = stdout.reset();

writeln!(stdout)?;

if error_occured {
@@ -61,6 +58,28 @@ pub fn coordinator_running() -> Result<bool, eyre::ErrReport> {
Ok(connected)
}

pub fn daemon_running() -> Result<bool, eyre::ErrReport> {
let mut control_session = None;
let running = match control_connection(&mut control_session) {
Ok(connection) => {
let reply_raw = connection
.request(&serde_json::to_vec(&ControlRequest::DaemonConnected).unwrap())
.wrap_err("failed to send DaemonConnected message")?;

let reply = serde_json::from_slice(&reply_raw).wrap_err("failed to parse reply")?;
match reply {
ControlRequestReply::DaemonConnected(running) => running,
other => bail!("unexpected reply to daemon connection check: {other:?}"),
}
}
Err(_) => {
// coordinator is not running
false
}
};
Ok(running)
}

pub fn check_dataflow(dataflow_path: &Path, runtime: Option<&Path>) -> eyre::Result<()> {
let descriptor = read_descriptor(dataflow_path).wrap_err_with(|| {
format!(
@@ -177,55 +196,41 @@ fn check_input(
) -> Result<(), eyre::ErrReport> {
match mapping {
InputMapping::Timer { interval: _ } => {}
InputMapping::User(UserInputMapping {
source,
operator,
output,
}) => {
InputMapping::User(UserInputMapping { source, output }) => {
let source_node = nodes.iter().find(|n| &n.id == source).ok_or_else(|| {
eyre!("source node `{source}` mapped to input `{input_id_str}` does not exist",)
})?;
if let Some(operator_id) = operator {
let operator = match &source_node.kind {
CoreNodeKind::Runtime(runtime) => {
let operator = runtime.operators.iter().find(|o| &o.id == operator_id);
operator.ok_or_else(|| {
match &source_node.kind {
CoreNodeKind::Custom(custom_node) => {
if !custom_node.run_config.outputs.contains(output) {
bail!(
"output `{source}/{output}` mapped to \
input `{input_id_str}` does not exist",
);
}
}
CoreNodeKind::Runtime(runtime) => {
let (operator_id, output) = output.split_once('/').unwrap_or_default();
let operator_id = OperatorId::from(operator_id.to_owned());
let output = DataId::from(output.to_owned());

let operator = runtime
.operators
.iter()
.find(|o| o.id == operator_id)
.ok_or_else(|| {
eyre!(
"source operator `{source}/{operator_id}` used \
for input `{input_id_str}` does not exist",
)
})?
}
CoreNodeKind::Custom(_) => {
})?;
if !operator.config.outputs.contains(&output) {
bail!(
"input `{input_id_str}` references operator \
`{source}/{operator_id}`, but `{source}` is a \
custom node",
"output `{source}/{operator_id}/{output}` mapped to \
input `{input_id_str}` does not exist",
);
}
};

if !operator.config.outputs.contains(output) {
bail!(
"output `{source}/{operator_id}/{output}` mapped to \
input `{input_id_str}` does not exist",
);
}
} else {
match &source_node.kind {
CoreNodeKind::Runtime(_) => bail!(
"input `{input_id_str}` references output \
`{source}/{output}`, but `{source}` is a \
runtime node",
),
CoreNodeKind::Custom(custom_node) => {
if !custom_node.run_config.outputs.contains(output) {
bail!(
"output `{source}/{output}` mapped to \
input `{input_id_str}` does not exist",
);
}
}
}
}
}


+ 21
- 20
binaries/cli/src/main.rs View File

@@ -1,9 +1,6 @@
use clap::Parser;
use communication_layer_request_reply::{RequestReplyLayer, TcpLayer, TcpRequestReplyConnection};
use dora_core::topics::{
control_socket_addr, ControlRequest, DataflowId, ListDataflowResult, StartDataflowResult,
StopDataflowResult,
};
use dora_core::topics::{control_socket_addr, ControlRequest, ControlRequestReply, DataflowId};
use eyre::{bail, Context};
use std::path::PathBuf;
use uuid::Uuid;
@@ -48,9 +45,9 @@ enum Command {
#[clap(long)]
config: Option<PathBuf>,
#[clap(long)]
roudi_path: Option<PathBuf>,
#[clap(long)]
coordinator_path: Option<PathBuf>,
#[clap(long)]
daemon_path: Option<PathBuf>,
},
Destroy {
#[clap(long)]
@@ -126,12 +123,12 @@ fn main() -> eyre::Result<()> {
Command::Dashboard => todo!(),
Command::Up {
config,
roudi_path,
coordinator_path,
daemon_path,
} => up::up(
config.as_deref(),
roudi_path.as_deref(),
coordinator_path.as_deref(),
daemon_path.as_deref(),
)?,
Command::Start { dataflow, name } => start_dataflow(dataflow, name, &mut session)?,
Command::List => list(&mut session)?,
@@ -169,14 +166,15 @@ fn start_dataflow(
)
.wrap_err("failed to send start dataflow message")?;

let result: StartDataflowResult =
let result: ControlRequestReply =
serde_json::from_slice(&reply_raw).wrap_err("failed to parse reply")?;
match result {
StartDataflowResult::Ok { uuid } => {
ControlRequestReply::DataflowStarted { uuid } => {
println!("{uuid}");
Ok(())
}
StartDataflowResult::Error(err) => bail!(err),
ControlRequestReply::Error(err) => bail!("{err}"),
other => bail!("unexpected start dataflow reply: {other:?}"),
}
}

@@ -206,11 +204,12 @@ fn stop_dataflow(
.unwrap(),
)
.wrap_err("failed to send dataflow stop message")?;
let result: StopDataflowResult =
let result: ControlRequestReply =
serde_json::from_slice(&reply_raw).wrap_err("failed to parse reply")?;
match result {
StopDataflowResult::Ok => Ok(()),
StopDataflowResult::Error(err) => bail!(err),
ControlRequestReply::DataflowStopped { uuid: _ } => Ok(()),
ControlRequestReply::Error(err) => bail!("{err}"),
other => bail!("unexpected stop dataflow reply: {other:?}"),
}
}

@@ -221,11 +220,12 @@ fn stop_dataflow_by_name(
let reply_raw = control_connection(session)?
.request(&serde_json::to_vec(&ControlRequest::StopByName { name }).unwrap())
.wrap_err("failed to send dataflow stop_by_name message")?;
let result: StopDataflowResult =
let result: ControlRequestReply =
serde_json::from_slice(&reply_raw).wrap_err("failed to parse reply")?;
match result {
StopDataflowResult::Ok => Ok(()),
StopDataflowResult::Error(err) => bail!(err),
ControlRequestReply::DataflowStopped { uuid: _ } => Ok(()),
ControlRequestReply::Error(err) => bail!("{err}"),
other => bail!("unexpected stop dataflow reply: {other:?}"),
}
}

@@ -250,11 +250,12 @@ fn query_running_dataflows(
let reply_raw = control_connection(session)?
.request(&serde_json::to_vec(&ControlRequest::List).unwrap())
.wrap_err("failed to send list message")?;
let reply: ListDataflowResult =
let reply: ControlRequestReply =
serde_json::from_slice(&reply_raw).wrap_err("failed to parse reply")?;
let ids = match reply {
ListDataflowResult::Ok { dataflows } => dataflows,
ListDataflowResult::Error(err) => bail!(err),
ControlRequestReply::DataflowList { dataflows } => dataflows,
ControlRequestReply::Error(err) => bail!("{err}"),
other => bail!("unexpected list dataflow reply: {other:?}"),
};

Ok(ids)


+ 9
- 0
binaries/cli/src/template/python/operator/operator-template.py View File

@@ -12,6 +12,15 @@ class Operator:
"""Called on initialisation"""
pass

def on_event(
self,
dora_event: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
if dora_event["type"] == "INPUT":
return self.on_input(dora_event, send_output)
return DoraStatus.CONTINUE

def on_input(
self,
dora_input: dict,


+ 12
- 6
binaries/cli/src/template/rust/node/main-template.rs View File

@@ -1,13 +1,19 @@
use dora_node_api::DoraNode;
use dora_node_api::{DoraNode, Event};
use std::error::Error;

fn main() -> Result<(), Box<dyn Error>> {
let mut node = DoraNode::init_from_env()?;
let inputs = node.inputs()?;
let (mut node, mut events) = DoraNode::init_from_env()?;

while let Ok(input) = inputs.recv() {
match input.id.as_str() {
other => eprintln!("Received input `{other}`"),
while let Some(event) = events.recv() {
match event {
Event::Input {
id,
metadata,
data: _,
} => match id.as_str() {
other => eprintln!("Received input `{other}`"),
},
_ => {}
}
}



+ 9
- 6
binaries/cli/src/template/rust/operator/lib-template.rs View File

@@ -1,4 +1,4 @@
use dora_operator_api::{register_operator, DoraOperator, DoraOutputSender, DoraStatus};
use dora_operator_api::{register_operator, DoraOperator, DoraOutputSender, DoraStatus, Event};

register_operator!(ExampleOperator);

@@ -8,15 +8,18 @@ struct ExampleOperator {
}

impl DoraOperator for ExampleOperator {
fn on_input(
fn on_event(
&mut self,
id: &str,
data: &[u8],
event: &Event,
output_sender: &mut DoraOutputSender,
) -> Result<DoraStatus, String> {
match id {
other => eprintln!("Received input {other}"),
match event {
Event::Input { id, data } => match id {
other => eprintln!("Received input {other}"),
},
_ => {}
}

Ok(DoraStatus::Continue)
}
}

+ 22
- 45
binaries/cli/src/up.rs View File

@@ -1,35 +1,31 @@
use crate::{check::coordinator_running, control_connection};
use crate::{
check::{coordinator_running, daemon_running},
control_connection,
};
use communication_layer_request_reply::TcpRequestReplyConnection;
use dora_core::topics::ControlRequest;
use eyre::{bail, Context};
use std::{fs, path::Path, process::Command};
use sysinfo::{ProcessExt, SystemExt};
use eyre::Context;
use std::{fs, path::Path, process::Command, time::Duration};

#[derive(Debug, serde::Serialize, serde::Deserialize)]
struct UpConfig {
iceoryx: bool,
}

impl Default for UpConfig {
fn default() -> Self {
Self { iceoryx: true }
}
}
#[derive(Debug, Default, serde::Serialize, serde::Deserialize)]
struct UpConfig {}

pub(crate) fn up(
config_path: Option<&Path>,
roudi: Option<&Path>,
coordinator: Option<&Path>,
daemon: Option<&Path>,
) -> eyre::Result<()> {
let UpConfig { iceoryx } = parse_dora_config(config_path)?;
let UpConfig {} = parse_dora_config(config_path)?;

if !coordinator_running()? {
start_coordinator(coordinator).wrap_err("failed to start dora-coordinator")?;
// sleep a bit until the coordinator accepts connections
while !coordinator_running()? {
std::thread::sleep(Duration::from_millis(50));
}
}

if iceoryx {
// try to start roudi
start_roudi(roudi).wrap_err("failed to start iceoryx roudi daemon")?;
if !daemon_running()? {
start_daemon(daemon).wrap_err("failed to start dora-daemon")?;
}

Ok(())
@@ -39,7 +35,7 @@ pub(crate) fn destroy(
config_path: Option<&Path>,
session: &mut Option<Box<TcpRequestReplyConnection>>,
) -> Result<(), eyre::ErrReport> {
let UpConfig { iceoryx } = parse_dora_config(config_path)?;
let UpConfig {} = parse_dora_config(config_path)?;

if coordinator_running()? {
// send destroy command to dora-coordinator
@@ -51,25 +47,6 @@ pub(crate) fn destroy(
eprintln!("The dora-coordinator is not running");
}

if iceoryx {
// kill iox-roudi process
let system = sysinfo::System::new_all();
let processes: Vec<_> = system.processes_by_exact_name("iox-roudi").collect();
if processes.is_empty() {
eprintln!("No `iox-roudi` process found");
} else if processes.len() == 1 {
let process = processes[0];
let success = process.kill();
if success {
println!("Killed `iox-roudi` process");
} else {
bail!("failed to kill iox-roudi process");
}
} else {
bail!("multiple iox-roudi processes found, please kill the correct processes manually");
}
}

Ok(())
}

@@ -99,14 +76,14 @@ fn start_coordinator(coordinator: Option<&Path>) -> eyre::Result<()> {
Ok(())
}

fn start_roudi(roudi: Option<&Path>) -> eyre::Result<()> {
let roudi = roudi.unwrap_or_else(|| Path::new("iox-roudi"));
fn start_daemon(daemon: Option<&Path>) -> eyre::Result<()> {
let daemon = daemon.unwrap_or_else(|| Path::new("dora-daemon"));

let mut cmd = Command::new(roudi);
let mut cmd = Command::new(daemon);
cmd.spawn()
.wrap_err_with(|| format!("failed to run {}", roudi.display()))?;
.wrap_err_with(|| format!("failed to run {}", daemon.display()))?;

println!("started iox-roudi daemon");
println!("started dora daemon");

Ok(())
}

+ 3
- 3
binaries/coordinator/Cargo.toml View File

@@ -14,20 +14,20 @@ futures = "0.3.21"
serde = { version = "1.0.136", features = ["derive"] }
serde_yaml = "0.8.23"
tokio = { version = "1.24.2", features = ["full"] }
tokio-stream = { version = "0.1.8", features = ["io-util"] }
tokio-stream = { version = "0.1.8", features = ["io-util", "net"] }
tokio-util = { version = "0.7.1", features = ["codec"] }
clap = { version = "3.1.8", features = ["derive"] }
uuid = { version = "1.2.1" }
time = "0.3.9"
rand = "0.8.5"
dora-core = { workspace = true }
dora-message = { path = "../../libraries/message" }
tracing = "0.1.36"
tracing-subscriber = "0.3.15"
futures-concurrency = "5.0.1"
futures-concurrency = "7.1.0"
zenoh = { git = "https://github.com/eclipse-zenoh/zenoh.git", rev = "79a136e4fd90b11ff5d775ced981af53c4f1071b" }
serde_json = "1.0.86"
dora-download = { path = "../../libraries/extensions/download" }
which = "4.3.0"
communication-layer-request-reply = { path = "../../libraries/communication-layer/request-reply" }
thiserror = "1.0.37"
ctrlc = "3.2.5"

+ 107
- 65
binaries/coordinator/src/control.rs View File

@@ -1,29 +1,45 @@
use crate::Event;
use communication_layer_request_reply::{ListenConnection, RequestReplyLayer, TcpLayer};
use dora_core::topics::ControlRequest;
use eyre::Context;
use futures::{Stream, StreamExt};
use std::{
io::{self, ErrorKind},
net::SocketAddr,
use crate::{
tcp_utils::{tcp_receive, tcp_send},
Event,
};
use dora_core::topics::{ControlRequest, ControlRequestReply};
use eyre::{eyre, Context};
use futures::{
future::{self, Either},
stream::FuturesUnordered,
FutureExt, Stream, StreamExt,
};
use futures_concurrency::future::Race;
use std::{io::ErrorKind, net::SocketAddr};
use tokio::{
net::{TcpListener, TcpStream},
sync::{mpsc, oneshot},
task::JoinHandle,
};
use tokio::sync::{mpsc, oneshot};
use tokio_stream::wrappers::ReceiverStream;

pub(crate) async fn control_events(
control_listen_addr: SocketAddr,
tasks: &FuturesUnordered<JoinHandle<()>>,
) -> eyre::Result<impl Stream<Item = Event>> {
let (tx, rx) = mpsc::channel(10);

std::thread::spawn(move || listen(control_listen_addr, tx));
let (finish_tx, mut finish_rx) = mpsc::channel(1);
tasks.push(tokio::spawn(listen(control_listen_addr, tx, finish_tx)));
tasks.push(tokio::spawn(async move {
while let Some(()) = finish_rx.recv().await {}
}));

Ok(ReceiverStream::new(rx).map(Event::Control))
}

fn listen(control_listen_addr: SocketAddr, tx: mpsc::Sender<ControlEvent>) {
let mut com_layer = TcpLayer::new();
let result = com_layer
.listen(control_listen_addr)
async fn listen(
control_listen_addr: SocketAddr,
tx: mpsc::Sender<ControlEvent>,
_finish_tx: mpsc::Sender<()>,
) {
let result = TcpListener::bind(control_listen_addr)
.await
.wrap_err("failed to listen for control messages");
let incoming = match result {
Ok(incoming) => incoming,
@@ -33,11 +49,20 @@ fn listen(control_listen_addr: SocketAddr, tx: mpsc::Sender<ControlEvent>) {
}
};

for connection in incoming {
loop {
let new_connection = incoming.accept().map(Either::Left);
let coordinator_stop = tx.closed().map(Either::Right);
let connection = match (new_connection, coordinator_stop).race().await {
future::Either::Left(connection) => connection,
future::Either::Right(()) => {
// coordinator was stopped
break;
}
};
match connection.wrap_err("failed to connect") {
Ok(connection) => {
Ok((connection, _)) => {
let tx = tx.clone();
std::thread::spawn(|| handle_requests(connection, tx));
tokio::spawn(handle_requests(connection, tx, _finish_tx.clone()));
}
Err(err) => {
if tx.blocking_send(err.into()).is_err() {
@@ -48,75 +73,92 @@ fn listen(control_listen_addr: SocketAddr, tx: mpsc::Sender<ControlEvent>) {
}
}

fn handle_requests(
mut connection: Box<
dyn ListenConnection<RequestData = Vec<u8>, ReplyData = Vec<u8>, Error = std::io::Error>,
>,
async fn handle_requests(
mut connection: TcpStream,
tx: mpsc::Sender<ControlEvent>,
_finish_tx: mpsc::Sender<()>,
) {
loop {
let tx = tx.clone();
let result = connection.handle_next(Box::new(move |raw| {
let (reply, reply_rx) = oneshot::channel();
let request = match serde_json::from_slice(&raw) {
Ok(request) => ControlEvent::IncomingRequest {
request,
reply_sender: reply,
let next_request = tcp_receive(&mut connection).map(Either::Left);
let coordinator_stopped = tx.closed().map(Either::Right);
let raw = match (next_request, coordinator_stopped).race().await {
Either::Right(()) => break,
Either::Left(request) => match request {
Ok(message) => message,
Err(err) => match err.kind() {
ErrorKind::UnexpectedEof => {
tracing::trace!("Control connection closed");
break;
}
err => {
let err = eyre!(err).wrap_err("failed to receive incoming message");
tracing::error!("{err}");
break;
}
},
Err(err) => return Err(io::Error::new(ErrorKind::Other, HandlerError::from(err))),
},
};

let result =
match serde_json::from_slice(&raw).wrap_err("failed to deserialize incoming message") {
Ok(request) => handle_request(request, &tx).await,
Err(err) => Err(err),
};
if tx.blocking_send(request).is_err() {
return Err(io::Error::new(
io::ErrorKind::Other,
HandlerError::ServerStopped,
));
}

let Ok(reply) = reply_rx.blocking_recv() else {
return Err(io::Error::new(
io::ErrorKind::Other,
HandlerError::ServerStopped,
));
let reply = result.unwrap_or_else(|err| ControlRequestReply::Error(format!("{err}")));
let serialized =
match serde_json::to_vec(&reply).wrap_err("failed to serialize ControlRequestReply") {
Ok(s) => s,
Err(err) => {
tracing::error!("{err:?}");
break;
}
};
Ok(reply)
}));
if let Err(err) = result {
match err.kind() {
match tcp_send(&mut connection, &serialized).await {
Ok(()) => {}
Err(err) => match err.kind() {
ErrorKind::UnexpectedEof => {
tracing::trace!("Control connection closed");
tracing::debug!("Control connection closed while trying to send reply");
break;
}
ErrorKind::Other => {
let inner = err.into_inner().unwrap();
let downcasted = inner.downcast_ref().unwrap();
match downcasted {
HandlerError::ParseError(err) => {
tracing::warn!("failed to parse request: {err}");
}
HandlerError::ServerStopped => break,
}
}
_ => {
tracing::warn!("I/O error while trying to receive control request: {err:?}");
err => {
let err = eyre!(err).wrap_err("failed to send reply");
tracing::error!("{err}");
break;
}
}
},
}

if matches!(reply, ControlRequestReply::CoordinatorStopped) {
break;
}
}
}

#[derive(Debug, thiserror::Error)]
enum HandlerError {
#[error("failed to parse request")]
ParseError(#[from] serde_json::Error),
#[error("server was stopped already")]
ServerStopped,
async fn handle_request(
request: ControlRequest,
tx: &mpsc::Sender<ControlEvent>,
) -> eyre::Result<ControlRequestReply> {
let (reply_tx, reply_rx) = oneshot::channel();
let event = ControlEvent::IncomingRequest {
request,
reply_sender: reply_tx,
};

if tx.send(event).await.is_err() {
return Ok(ControlRequestReply::CoordinatorStopped);
}

reply_rx
.await
.unwrap_or(Ok(ControlRequestReply::CoordinatorStopped))
}

#[derive(Debug)]
pub enum ControlEvent {
IncomingRequest {
request: ControlRequest,
reply_sender: oneshot::Sender<Vec<u8>>,
reply_sender: oneshot::Sender<eyre::Result<ControlRequestReply>>,
},
Error(eyre::Report),
}


+ 339
- 155
binaries/coordinator/src/lib.rs View File

@@ -1,41 +1,44 @@
use crate::run::spawn_dataflow;
use crate::{
run::spawn_dataflow,
tcp_utils::{tcp_receive, tcp_send},
};
use control::ControlEvent;
use dora_core::{
config::CommunicationConfig,
coordinator_messages::RegisterResult,
daemon_messages::{DaemonCoordinatorEvent, DaemonCoordinatorReply},
topics::{
control_socket_addr, ControlRequest, DataflowId, ListDataflowResult, StartDataflowResult,
StopDataflowResult,
control_socket_addr, ControlRequest, ControlRequestReply, DataflowId,
DORA_COORDINATOR_PORT_DEFAULT,
},
};
use dora_node_api::{communication, manual_stop_publisher};
use eyre::{bail, eyre, Result, WrapErr};
use futures::StreamExt;
use eyre::{bail, eyre, ContextCompat, WrapErr};
use futures::{stream::FuturesUnordered, Stream, StreamExt};
use futures_concurrency::stream::Merge;
use run::{await_tasks, SpawnedDataflow};
use run::SpawnedDataflow;
use std::{
collections::HashMap,
collections::{BTreeSet, HashMap},
path::{Path, PathBuf},
time::Duration,
};
use tokio_stream::wrappers::ReceiverStream;
use tokio::{net::TcpStream, sync::mpsc, task::JoinHandle};
use tokio_stream::wrappers::{ReceiverStream, TcpListenerStream};
use uuid::Uuid;

mod control;
mod listener;
mod run;
mod tcp_utils;

#[derive(Debug, Clone, clap::Parser)]
#[clap(about = "Dora coordinator")]
pub struct Args {
#[clap(long)]
pub runtime: Option<PathBuf>,
#[clap(long)]
pub run_dataflow: Option<PathBuf>,
}

pub async fn run(args: Args) -> eyre::Result<()> {
let Args {
runtime,
run_dataflow,
} = args;
let Args { runtime } = args;

let runtime_path = runtime.unwrap_or_else(|| {
std::env::args()
@@ -45,50 +48,124 @@ pub async fn run(args: Args) -> eyre::Result<()> {
.with_file_name("dora-runtime")
});

match run_dataflow {
Some(path) => {
// start the given dataflow directly
run::run_dataflow(&path, &runtime_path)
.await
.wrap_err_with(|| format!("failed to run dataflow at {}", path.display()))?;
}
None => {
// start in daemon mode
start(&runtime_path).await?;
let mut tasks = FuturesUnordered::new();

// start in daemon mode
start(&runtime_path, &tasks).await?;

tracing::debug!("coordinator main loop finished, waiting on spawned tasks");
while let Some(join_result) = tasks.next().await {
if let Err(err) = join_result {
tracing::error!("task panicked: {err}");
}
}
tracing::debug!("all spawned tasks finished, exiting..");

Ok(())
}

async fn start(runtime_path: &Path) -> eyre::Result<()> {
let (dataflow_events_tx, dataflow_events) = tokio::sync::mpsc::channel(2);
let mut dataflow_events_tx = Some(dataflow_events_tx);
let dataflow_events = ReceiverStream::new(dataflow_events);
async fn start(runtime_path: &Path, tasks: &FuturesUnordered<JoinHandle<()>>) -> eyre::Result<()> {
let ctrlc_events = set_up_ctrlc_handler()?;

let (control_events, control_events_abort) = futures::stream::abortable(
control::control_events(control_socket_addr())
.await
.wrap_err("failed to create control events")?,
let listener = listener::create_listener(DORA_COORDINATOR_PORT_DEFAULT).await?;
let new_daemon_connections = TcpListenerStream::new(listener).map(|c| {
c.map(Event::NewDaemonConnection)
.wrap_err("failed to open connection")
.unwrap_or_else(Event::DaemonConnectError)
});

let (daemon_events_tx, daemon_events) = tokio::sync::mpsc::channel(2);
let mut daemon_events_tx = Some(daemon_events_tx);
let daemon_events = ReceiverStream::new(daemon_events);

let control_events = control::control_events(control_socket_addr(), tasks)
.await
.wrap_err("failed to create control events")?;

let daemon_watchdog_interval =
tokio_stream::wrappers::IntervalStream::new(tokio::time::interval(Duration::from_secs(1)))
.map(|_| Event::DaemonWatchdogInterval);

// events that should be aborted on `dora destroy`
let (abortable_events, abort_handle) = futures::stream::abortable(
(
control_events,
new_daemon_connections,
ctrlc_events,
daemon_watchdog_interval,
)
.merge(),
);

let mut events = (dataflow_events, control_events).merge();
let mut events = (abortable_events, daemon_events).merge();

let mut running_dataflows = HashMap::new();
let mut running_dataflows: HashMap<Uuid, RunningDataflow> = HashMap::new();
let mut daemon_connections: HashMap<_, TcpStream> = HashMap::new();

while let Some(event) = events.next().await {
tracing::trace!("Handling event {event:?}");
if event.log() {
tracing::trace!("Handling event {event:?}");
}
match event {
Event::NewDaemonConnection(connection) => {
connection.set_nodelay(true)?;
let events_tx = daemon_events_tx.clone();
if let Some(events_tx) = events_tx {
let task = tokio::spawn(listener::handle_connection(connection, events_tx));
tasks.push(task);
} else {
tracing::warn!(
"ignoring new daemon connection because events_tx was closed already"
);
}
}
Event::DaemonConnectError(err) => {
tracing::warn!("{:?}", err.wrap_err("failed to connect to dora-daemon"));
}
Event::Daemon(event) => {
match event {
DaemonEvent::Register {
machine_id,
mut connection,
} => {
let reply = RegisterResult::Ok;
match tcp_send(&mut connection, &serde_json::to_vec(&reply)?).await {
Ok(()) => {
let previous =
daemon_connections.insert(machine_id.clone(), connection);
if let Some(_previous) = previous {
tracing::info!("closing previous connection `{machine_id}` on new register");
}
}
Err(err) => {
tracing::warn!("failed to register daemon connection for machine `{machine_id}`: {err}");
}
}
}
}
}
Event::Dataflow { uuid, event } => match event {
DataflowEvent::Finished { result } => {
running_dataflows.remove(&uuid);
match result {
Ok(()) => {
tracing::info!("dataflow `{uuid}` finished successfully");
DataflowEvent::DataflowFinishedOnMachine { machine_id, result } => {
match running_dataflows.entry(uuid) {
std::collections::hash_map::Entry::Occupied(mut entry) => {
entry.get_mut().machines.remove(&machine_id);
match result {
Ok(()) => {
tracing::info!("dataflow `{uuid}` finished successfully on machine `{machine_id}`");
}
Err(err) => {
let err =
err.wrap_err(format!("error occured in dataflow `{uuid}` on machine `{machine_id}`"));
tracing::error!("{err:?}");
}
}
if entry.get_mut().machines.is_empty() {
entry.remove();
tracing::info!("dataflow `{uuid}` finished");
}
}
Err(err) => {
let err = err.wrap_err(format!("error occured in dataflow `{uuid}`"));
tracing::error!("{err:?}");
std::collections::hash_map::Entry::Vacant(_) => {
tracing::warn!("dataflow not running on DataflowFinishedOnMachine");
}
}
}
@@ -118,35 +195,30 @@ async fn start(runtime_path: &Path) -> eyre::Result<()> {
&dataflow_path,
name,
runtime_path,
&dataflow_events_tx,
&mut daemon_connections,
)
.await?;
Ok(dataflow)
};
let reply = match inner.await {
Ok(dataflow) => {
let uuid = dataflow.uuid;
running_dataflows.insert(uuid, dataflow);
StartDataflowResult::Ok { uuid }
}
Err(err) => {
tracing::error!("{err:?}");
StartDataflowResult::Error(format!("{err:?}"))
}
};
serde_json::to_vec(&reply).unwrap()
inner.await.map(|dataflow| {
let uuid = dataflow.uuid;
running_dataflows.insert(uuid, dataflow);
ControlRequestReply::DataflowStarted { uuid }
})
}
ControlRequest::Stop { dataflow_uuid } => {
let stop = async {
stop_dataflow(&running_dataflows, dataflow_uuid).await?;
stop_dataflow(
&running_dataflows,
dataflow_uuid,
&mut daemon_connections,
)
.await?;
Result::<_, eyre::Report>::Ok(())
};
let reply = match stop.await {
Ok(()) => StopDataflowResult::Ok,
Err(err) => StopDataflowResult::Error(format!("{err:?}")),
};

serde_json::to_vec(&reply).unwrap()
stop.await.map(|()| ControlRequestReply::DataflowStopped {
uuid: dataflow_uuid,
})
}
ControlRequest::StopByName { name } => {
let stop = async {
@@ -164,36 +236,34 @@ async fn start(runtime_path: &Path) -> eyre::Result<()> {
bail!("multiple dataflows found with name `{name}`");
};

stop_dataflow(&running_dataflows, dataflow_uuid).await?;
Result::<_, eyre::Report>::Ok(())
};
let reply = match stop.await {
Ok(()) => StopDataflowResult::Ok,
Err(err) => StopDataflowResult::Error(format!("{err:?}")),
stop_dataflow(
&running_dataflows,
dataflow_uuid,
&mut daemon_connections,
)
.await?;
Result::<_, eyre::Report>::Ok(dataflow_uuid)
};
serde_json::to_vec(&reply).unwrap()
stop.await
.map(|uuid| ControlRequestReply::DataflowStopped { uuid })
}
ControlRequest::Destroy => {
tracing::info!("Received destroy command");

control_events_abort.abort();

// ensure that no new dataflows can be started
dataflow_events_tx = None;

// stop all running dataflows
for &uuid in running_dataflows.keys() {
stop_dataflow(&running_dataflows, uuid).await?;
}

b"ok".as_slice().into()
handle_destroy(
&running_dataflows,
&mut daemon_connections,
&abort_handle,
&mut daemon_events_tx,
)
.await
.map(|()| ControlRequestReply::DestroyOk)
}
ControlRequest::List => {
let mut dataflows: Vec<_> = running_dataflows.values().collect();
dataflows.sort();
dataflows.sort_by_key(|d| (&d.name, d.uuid));

let reply = ListDataflowResult::Ok {
Ok(ControlRequestReply::DataflowList {
dataflows: dataflows
.into_iter()
.map(|d| DataflowId {
@@ -201,15 +271,49 @@ async fn start(runtime_path: &Path) -> eyre::Result<()> {
name: d.name.clone(),
})
.collect(),
};

serde_json::to_vec(&reply).unwrap()
})
}
ControlRequest::DaemonConnected => {
let running = !daemon_connections.is_empty();
Ok(ControlRequestReply::DaemonConnected(running))
}
};
let _ = reply_sender.send(reply);
}
ControlEvent::Error(err) => tracing::error!("{err:?}"),
},
Event::DaemonWatchdogInterval => {
let mut disconnected = BTreeSet::new();
for (machine_id, connection) in &mut daemon_connections {
let result: eyre::Result<()> =
tokio::time::timeout(Duration::from_millis(100), send_watchdog_message(connection))
.await
.wrap_err("timeout")
.and_then(|r| r).wrap_err_with(||
format!("daemon at `{machine_id}` did not react as expected to watchdog message"),
);
if let Err(err) = result {
tracing::warn!("{err:?}");
disconnected.insert(machine_id.clone());
}
}
if !disconnected.is_empty() {
tracing::info!("Disconnecting daemons that failed watchdog: {disconnected:?}");
for machine_id in disconnected {
daemon_connections.remove(&machine_id);
}
}
}
Event::CtrlC => {
tracing::info!("Destroying coordinator after receiving Ctrl-C signal");
handle_destroy(
&running_dataflows,
&mut daemon_connections,
&abort_handle,
&mut daemon_events_tx,
)
.await?;
}
}
}

@@ -218,68 +322,110 @@ async fn start(runtime_path: &Path) -> eyre::Result<()> {
Ok(())
}

fn set_up_ctrlc_handler() -> Result<impl Stream<Item = Event>, eyre::ErrReport> {
let (ctrlc_tx, ctrlc_rx) = mpsc::channel(1);

let mut ctrlc_sent = false;
ctrlc::set_handler(move || {
if ctrlc_sent {
tracing::warn!("received second ctrlc signal -> aborting immediately");
std::process::abort();
} else {
tracing::info!("received ctrlc signal");
if ctrlc_tx.blocking_send(Event::CtrlC).is_err() {
tracing::error!("failed to report ctrl-c event to dora-coordinator");
}

ctrlc_sent = true;
}
})
.wrap_err("failed to set ctrl-c handler")?;

Ok(ReceiverStream::new(ctrlc_rx))
}

async fn handle_destroy(
running_dataflows: &HashMap<Uuid, RunningDataflow>,
daemon_connections: &mut HashMap<String, TcpStream>,
abortable_events: &futures::stream::AbortHandle,
daemon_events_tx: &mut Option<mpsc::Sender<Event>>,
) -> Result<(), eyre::ErrReport> {
abortable_events.abort();
for &uuid in running_dataflows.keys() {
stop_dataflow(running_dataflows, uuid, daemon_connections).await?;
}
destroy_daemons(daemon_connections).await?;
*daemon_events_tx = None;
Ok(())
}

async fn send_watchdog_message(connection: &mut TcpStream) -> eyre::Result<()> {
let message = serde_json::to_vec(&DaemonCoordinatorEvent::Watchdog).unwrap();

tcp_send(connection, &message)
.await
.wrap_err("failed to send watchdog message to daemon")?;
let reply_raw = tcp_receive(connection)
.await
.wrap_err("failed to receive stop reply from daemon")?;

match serde_json::from_slice(&reply_raw)
.wrap_err("failed to deserialize stop reply from daemon")?
{
DaemonCoordinatorReply::WatchdogAck => Ok(()),
other => bail!("unexpected reply after sending `watchdog`: {other:?}"),
}
}

struct RunningDataflow {
name: Option<String>,
uuid: Uuid,
communication_config: CommunicationConfig,
/// The IDs of the machines that the dataflow is running on.
machines: BTreeSet<String>,
}

impl PartialEq for RunningDataflow {
fn eq(&self, other: &Self) -> bool {
self.name == other.name && self.uuid == other.uuid
self.name == other.name && self.uuid == other.uuid && self.machines == other.machines
}
}

impl Eq for RunningDataflow {}

impl PartialOrd for RunningDataflow {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
match self.name.partial_cmp(&other.name) {
Some(core::cmp::Ordering::Equal) => {}
ord => return ord,
}
self.uuid.partial_cmp(&other.uuid)
}
}

impl Ord for RunningDataflow {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
match self.name.cmp(&other.name) {
core::cmp::Ordering::Equal => {}
ord => return ord,
}
self.uuid.cmp(&other.uuid)
}
}

async fn stop_dataflow(
running_dataflows: &HashMap<Uuid, RunningDataflow>,
uuid: Uuid,
daemon_connections: &mut HashMap<String, TcpStream>,
) -> eyre::Result<()> {
let communication_config = match running_dataflows.get(&uuid) {
Some(dataflow) => dataflow.communication_config.clone(),
None => bail!("No running dataflow found with UUID `{uuid}`"),
let Some(dataflow) = running_dataflows.get(&uuid) else {
bail!("No running dataflow found with UUID `{uuid}`")
};
let mut communication =
tokio::task::spawn_blocking(move || communication::init(&communication_config))
let message = serde_json::to_vec(&DaemonCoordinatorEvent::StopDataflow { dataflow_id: uuid })?;

for machine_id in &dataflow.machines {
let daemon_connection = daemon_connections
.get_mut(machine_id)
.wrap_err("no daemon connection")?; // TODO: take from dataflow spec
tcp_send(daemon_connection, &message)
.await
.wrap_err("failed to join communication layer init task")?
.wrap_err("failed to init communication layer")?;
tracing::info!("sending stop message to dataflow `{uuid}`");
let manual_stop_publisher = manual_stop_publisher(communication.as_mut())?;
tokio::task::spawn_blocking(move || -> Result<()> {
let hlc = dora_message::uhlc::HLC::default();
let metadata = dora_message::Metadata::new(hlc.new_timestamp());
let data = metadata.serialize().unwrap();
manual_stop_publisher
.publish(&data)
.map_err(|err| eyre::eyre!(err))
.wrap_err("failed to send stop message")
})
.await
.wrap_err("failed to join stop publish task")?
.map_err(|err| eyre!(err))
.wrap_err("failed to send stop message")?;
.wrap_err("failed to send stop message to daemon")?;

// wait for reply
let reply_raw = tcp_receive(daemon_connection)
.await
.wrap_err("failed to receive stop reply from daemon")?;
match serde_json::from_slice(&reply_raw)
.wrap_err("failed to deserialize stop reply from daemon")?
{
DaemonCoordinatorReply::StopResult(result) => result
.map_err(|e| eyre!(e))
.wrap_err("failed to stop dataflow")?,
other => bail!("unexpected reply after sending stop: {other:?}"),
}
}
tracing::info!("successfully stopped dataflow `{uuid}`");
Ok(())
}

@@ -287,46 +433,84 @@ async fn start_dataflow(
path: &Path,
name: Option<String>,
runtime_path: &Path,
dataflow_events_tx: &Option<tokio::sync::mpsc::Sender<Event>>,
daemon_connections: &mut HashMap<String, TcpStream>,
) -> eyre::Result<RunningDataflow> {
let runtime_path = runtime_path.to_owned();
let dataflow_events_tx = match dataflow_events_tx {
Some(channel) => channel.clone(),
None => bail!("cannot start new dataflow after receiving stop command"),
};

let SpawnedDataflow {
uuid,
communication_config,
tasks,
} = spawn_dataflow(&runtime_path, path).await?;
let path = path.to_owned();
let task = async move {
let result = await_tasks(tasks)
.await
.wrap_err_with(|| format!("failed to run dataflow at {}", path.display()));

let _ = dataflow_events_tx
.send(Event::Dataflow {
uuid,
event: DataflowEvent::Finished { result },
})
.await;
};
tokio::spawn(task);
machines,
} = spawn_dataflow(&runtime_path, path, daemon_connections).await?;
Ok(RunningDataflow {
uuid,
name,
communication_config,
machines,
})
}

async fn destroy_daemons(daemon_connections: &mut HashMap<String, TcpStream>) -> eyre::Result<()> {
let message = serde_json::to_vec(&DaemonCoordinatorEvent::Destroy)?;

for (machine_id, mut daemon_connection) in daemon_connections.drain() {
tcp_send(&mut daemon_connection, &message)
.await
.wrap_err("failed to send destroy message to daemon")?;

// wait for reply
let reply_raw = tcp_receive(&mut daemon_connection)
.await
.wrap_err("failed to receive destroy reply from daemon")?;
match serde_json::from_slice(&reply_raw)
.wrap_err("failed to deserialize destroy reply from daemon")?
{
DaemonCoordinatorReply::DestroyResult(result) => result
.map_err(|e| eyre!(e))
.wrap_err("failed to destroy dataflow")?,
other => bail!("unexpected reply after sending `destroy`: {other:?}"),
}

tracing::info!("successfully destroyed daemon `{machine_id}`");
}

Ok(())
}

#[derive(Debug)]
enum Event {
pub enum Event {
NewDaemonConnection(TcpStream),
DaemonConnectError(eyre::Report),
Dataflow { uuid: Uuid, event: DataflowEvent },
Control(ControlEvent),
Daemon(DaemonEvent),
DaemonWatchdogInterval,
CtrlC,
}

impl Event {
/// Whether this event should be logged.
#[allow(clippy::match_like_matches_macro)]
pub fn log(&self) -> bool {
match self {
Event::DaemonWatchdogInterval => false,
_ => true,
}
}
}

#[derive(Debug)]
pub enum DataflowEvent {
DataflowFinishedOnMachine {
machine_id: String,
result: eyre::Result<()>,
},
}

#[derive(Debug)]
enum DataflowEvent {
Finished { result: eyre::Result<()> },
pub enum DaemonEvent {
Register {
machine_id: String,
connection: TcpStream,
},
}

+ 83
- 0
binaries/coordinator/src/listener.rs View File

@@ -0,0 +1,83 @@
use crate::{
tcp_utils::{tcp_receive, tcp_send},
DaemonEvent, DataflowEvent, Event,
};
use dora_core::coordinator_messages;
use eyre::{eyre, Context};
use std::{io::ErrorKind, net::Ipv4Addr, time::Duration};
use tokio::{
net::{TcpListener, TcpStream},
sync::mpsc,
};

pub async fn create_listener(port: u16) -> eyre::Result<TcpListener> {
let localhost = Ipv4Addr::new(127, 0, 0, 1);
let socket = match TcpListener::bind((localhost, port)).await {
Ok(socket) => socket,
Err(err) => {
return Err(eyre::Report::new(err).wrap_err("failed to create local TCP listener"))
}
};
Ok(socket)
}

pub async fn handle_connection(mut connection: TcpStream, events_tx: mpsc::Sender<Event>) {
loop {
// receive the next message and parse it
let raw = match tcp_receive(&mut connection).await {
Ok(data) => data,
Err(err) if err.kind() == ErrorKind::UnexpectedEof => {
break;
}
Err(err) => {
tracing::error!("{err:?}");
continue;
}
};
let message: coordinator_messages::CoordinatorRequest =
match serde_json::from_slice(&raw).wrap_err("failed to deserialize node message") {
Ok(e) => e,
Err(err) => {
tracing::warn!("{err:?}");
continue;
}
};

// handle the message and translate it to a DaemonEvent
match message {
coordinator_messages::CoordinatorRequest::Register { machine_id } => {
let event = DaemonEvent::Register {
machine_id,
connection,
};
let _ = events_tx.send(Event::Daemon(event)).await;
break;
}
coordinator_messages::CoordinatorRequest::Event { machine_id, event } => match event {
coordinator_messages::DaemonEvent::AllNodesFinished {
dataflow_id,
result,
} => {
let event = Event::Dataflow {
uuid: dataflow_id,
event: DataflowEvent::DataflowFinishedOnMachine {
machine_id,
result: result.map_err(|e| eyre!(e)),
},
};
if events_tx.send(event).await.is_err() {
break;
}
}
coordinator_messages::DaemonEvent::Watchdog => {
let reply = serde_json::to_vec(&coordinator_messages::WatchdogAck).unwrap();
_ = tokio::time::timeout(
Duration::from_millis(10),
tcp_send(&mut connection, &reply),
)
.await;
}
},
};
}
}

+ 0
- 96
binaries/coordinator/src/run/custom.rs View File

@@ -1,96 +0,0 @@
use super::command_init_common_env;
use dora_core::{
config::NodeId,
descriptor::{self, resolve_path, source_is_url, EnvValue},
};
use dora_download::download_file;
use eyre::{bail, eyre, WrapErr};
use std::{collections::BTreeMap, env::consts::EXE_EXTENSION, path::Path};

const SHELL_SOURCE: &str = "shell";

#[tracing::instrument]
pub(super) async fn spawn_custom_node(
node_id: NodeId,
node: &descriptor::CustomNode,
envs: &Option<BTreeMap<String, EnvValue>>,
communication: &dora_core::config::CommunicationConfig,
working_dir: &Path,
) -> eyre::Result<tokio::task::JoinHandle<eyre::Result<(), eyre::Error>>> {
let resolved_path = if source_is_url(&node.source) {
// try to download the shared library
let target_path = Path::new("build")
.join(node_id.to_string())
.with_extension(EXE_EXTENSION);
download_file(&node.source, &target_path)
.await
.wrap_err("failed to download custom node")?;
Ok(target_path.clone())
} else {
resolve_path(&node.source, working_dir)
};

let mut command = if let Ok(path) = &resolved_path {
let mut command = tokio::process::Command::new(path);
if let Some(args) = &node.args {
command.args(args.split_ascii_whitespace());
}
command
} else if node.source == SHELL_SOURCE {
if cfg!(target_os = "windows") {
let mut cmd = tokio::process::Command::new("cmd");
cmd.args(["/C", &node.args.clone().unwrap_or_default()]);
cmd
} else {
let mut cmd = tokio::process::Command::new("sh");
cmd.args(["-c", &node.args.clone().unwrap_or_default()]);
cmd
}
} else {
bail!("could not understand node source: {}", node.source);
};

command_init_common_env(&mut command, &node_id, communication)?;
command.env(
"DORA_NODE_RUN_CONFIG",
serde_yaml::to_string(&node.run_config)
.wrap_err("failed to serialize custom node run config")?,
);
command.current_dir(working_dir);

// Injecting the env variable defined in the `yaml` into
// the node runtime.
if let Some(envs) = envs {
for (key, value) in envs {
command.env(key, value.to_string());
}
}

let mut child = command.spawn().wrap_err_with(|| {
if let Ok(path) = resolved_path {
format!(
"failed to run source path: `{}` with args `{}`",
path.display(),
node.args.as_deref().unwrap_or_default()
)
} else {
format!(
"failed to run command: `{}` with args `{}`",
node.source,
node.args.as_deref().unwrap_or_default()
)
}
})?;
let result = tokio::spawn(async move {
let status = child.wait().await.context("child process failed")?;
if status.success() {
tracing::info!("node {node_id} finished");
Ok(())
} else if let Some(code) = status.code() {
Err(eyre!("node {node_id} failed with exit code: {code}"))
} else {
Err(eyre!("node {node_id} failed (unknown exit code)"))
}
});
Ok(result)
}

+ 51
- 106
binaries/coordinator/src/run/mod.rs View File

@@ -1,25 +1,24 @@
use self::{custom::spawn_custom_node, runtime::spawn_runtime_node};
use crate::tcp_utils::{tcp_receive, tcp_send};

use dora_core::{
config::{format_duration, CommunicationConfig, NodeId},
descriptor::{self, collect_dora_timers, CoreNodeKind, Descriptor},
config::CommunicationConfig,
daemon_messages::{DaemonCoordinatorEvent, DaemonCoordinatorReply, SpawnDataflowNodes},
descriptor::{CoreNodeKind, Descriptor},
};
use eyre::{bail, eyre, ContextCompat, WrapErr};
use std::{
collections::{BTreeSet, HashMap},
env::consts::EXE_EXTENSION,
path::Path,
};
use dora_node_api::communication;
use eyre::{bail, eyre, WrapErr};
use futures::{stream::FuturesUnordered, StreamExt};
use std::{env::consts::EXE_EXTENSION, path::Path};
use tokio_stream::wrappers::IntervalStream;
use tracing::warn;
use tokio::net::TcpStream;
use uuid::Uuid;

mod custom;
mod runtime;

pub async fn run_dataflow(dataflow_path: &Path, runtime: &Path) -> eyre::Result<()> {
let tasks = spawn_dataflow(runtime, dataflow_path).await?.tasks;
await_tasks(tasks).await
}

pub async fn spawn_dataflow(runtime: &Path, dataflow_path: &Path) -> eyre::Result<SpawnedDataflow> {
pub async fn spawn_dataflow(
runtime: &Path,
dataflow_path: &Path,
daemon_connections: &mut HashMap<String, TcpStream>,
) -> eyre::Result<SpawnedDataflow> {
let mut runtime = runtime.with_extension(EXE_EXTENSION);
let descriptor = read_descriptor(dataflow_path).await.wrap_err_with(|| {
format!(
@@ -34,7 +33,6 @@ pub async fn spawn_dataflow(runtime: &Path, dataflow_path: &Path) -> eyre::Resul
.ok_or_else(|| eyre!("canonicalized dataflow path has no parent"))?
.to_owned();
let nodes = descriptor.resolve_aliases();
let dora_timers = collect_dora_timers(&nodes);
let uuid = Uuid::new_v4();
let communication_config = {
let mut config = descriptor.communication;
@@ -59,87 +57,51 @@ pub async fn spawn_dataflow(runtime: &Path, dataflow_path: &Path) -> eyre::Resul
}
}
}
let tasks = FuturesUnordered::new();
for node in nodes {
let node_id = node.id.clone();

match node.kind {
descriptor::CoreNodeKind::Custom(custom) => {
let result = spawn_custom_node(
node_id.clone(),
&custom,
&node.env,
&communication_config,
&working_dir,
)
.await
.wrap_err_with(|| format!("failed to spawn custom node {node_id}"))?;
tasks.push(result);
}
descriptor::CoreNodeKind::Runtime(runtime_node) => {
if !runtime_node.operators.is_empty() {
let result = spawn_runtime_node(
&runtime,
node_id.clone(),
&runtime_node,
&node.env,
&communication_config,
&working_dir,
)
.wrap_err_with(|| format!("failed to spawn runtime node {node_id}"))?;
tasks.push(result);
}
}
}
}
for interval in dora_timers {
let communication_config = communication_config.clone();
let mut communication =
tokio::task::spawn_blocking(move || communication::init(&communication_config))
.await
.wrap_err("failed to join communication layer init task")?
.wrap_err("failed to init communication layer")?;
tokio::spawn(async move {
let topic = {
let duration = format_duration(interval);
format!("dora/timer/{duration}")
};
let hlc = dora_message::uhlc::HLC::default();
let mut stream = IntervalStream::new(tokio::time::interval(interval));
while (stream.next().await).is_some() {
let metadata = dora_message::Metadata::new(hlc.new_timestamp());
let data = metadata.serialize().unwrap();
communication
.publisher(&topic)
.unwrap()
.publish(&data)
.expect("failed to publish timer tick message");
}
});
let spawn_command = SpawnDataflowNodes {
dataflow_id: uuid,
working_dir,
nodes,
daemon_communication: descriptor.daemon_config,
};
let message = serde_json::to_vec(&DaemonCoordinatorEvent::Spawn(spawn_command))?;

// TODO allow partitioning a dataflow across multiple machines
let machine_id = "";
let machines = [machine_id.to_owned()].into();

let daemon_connection = daemon_connections
.get_mut(machine_id)
.wrap_err("no daemon connection")?; // TODO: take from dataflow spec
tcp_send(daemon_connection, &message)
.await
.wrap_err("failed to send spawn message to daemon")?;

// wait for reply
let reply_raw = tcp_receive(daemon_connection)
.await
.wrap_err("failed to receive spawn reply from daemon")?;
match serde_json::from_slice(&reply_raw)
.wrap_err("failed to deserialize spawn reply from daemon")?
{
DaemonCoordinatorReply::SpawnResult(result) => result
.map_err(|e| eyre!(e))
.wrap_err("failed to spawn dataflow")?,
_ => bail!("unexpected reply"),
}
tracing::info!("successfully spawned dataflow `{uuid}`");

Ok(SpawnedDataflow {
tasks,
communication_config,
uuid,
machines,
})
}

pub struct SpawnedDataflow {
pub uuid: Uuid,
pub communication_config: CommunicationConfig,
pub tasks: FuturesUnordered<tokio::task::JoinHandle<Result<(), eyre::ErrReport>>>,
}

pub async fn await_tasks(
mut tasks: FuturesUnordered<tokio::task::JoinHandle<Result<(), eyre::ErrReport>>>,
) -> eyre::Result<()> {
while let Some(task_result) = tasks.next().await {
task_result
.wrap_err("failed to join async task")?
.wrap_err("One node failed!")
.unwrap_or_else(|err| warn!("{err}"))
}
Ok(())
pub machines: BTreeSet<String>,
}

async fn read_descriptor(file: &Path) -> Result<Descriptor, eyre::Error> {
@@ -150,20 +112,3 @@ async fn read_descriptor(file: &Path) -> Result<Descriptor, eyre::Error> {
serde_yaml::from_slice(&descriptor_file).context("failed to parse given descriptor")?;
Ok(descriptor)
}

fn command_init_common_env(
command: &mut tokio::process::Command,
node_id: &NodeId,
communication: &dora_core::config::CommunicationConfig,
) -> Result<(), eyre::Error> {
command.env(
"DORA_NODE_ID",
serde_yaml::to_string(&node_id).wrap_err("failed to serialize custom node ID")?,
);
command.env(
"DORA_COMMUNICATION_CONFIG",
serde_yaml::to_string(communication)
.wrap_err("failed to serialize communication config")?,
);
Ok(())
}

+ 0
- 116
binaries/coordinator/src/run/runtime.rs View File

@@ -1,116 +0,0 @@
use super::command_init_common_env;
use dora_core::{
config::NodeId,
descriptor::{self, EnvValue, OperatorSource},
};
use eyre::{eyre, WrapErr};
use std::{collections::BTreeMap, path::Path};

#[tracing::instrument(skip(node))]
pub fn spawn_runtime_node(
runtime: &Path,
node_id: NodeId,
node: &descriptor::RuntimeNode,
envs: &Option<BTreeMap<String, EnvValue>>,
communication: &dora_core::config::CommunicationConfig,
working_dir: &Path,
) -> eyre::Result<tokio::task::JoinHandle<eyre::Result<(), eyre::Error>>> {
let has_python_operator = node
.operators
.iter()
.any(|x| matches!(x.config.source, OperatorSource::Python { .. }));

let has_other_operator = node
.operators
.iter()
.any(|x| !matches!(x.config.source, OperatorSource::Python { .. }));

let mut command = if has_python_operator && !has_other_operator {
// Use python to spawn runtime if there is a python operator
let mut command = tokio::process::Command::new("python3");
command.args(["-c", "import dora; dora.start_runtime()"]);
command
} else if !has_python_operator && has_other_operator {
// Use default runtime if there is no python operator
tokio::process::Command::new(runtime)
} else {
return Err(eyre!(
"Runtime can not mix Python Operator with other type of operator."
));
};

command_init_common_env(&mut command, &node_id, communication)?;
command.env(
"DORA_OPERATORS",
serde_yaml::to_string(&node.operators)
.wrap_err("failed to serialize custom node run config")?,
);

// Injecting the env variable defined in the `yaml` into
// the node runtime.
if let Some(envs) = &envs {
for (key, value) in envs {
command.env(key, value.to_string());
}
}

command.current_dir(working_dir);

let mut child = command
.spawn()
.wrap_err_with(|| format!("failed to run runtime at `{}`", runtime.display()))?;
let result = tokio::spawn(async move {
let status = child.wait().await.context("child process failed")?;
if status.success() {
tracing::info!("runtime node {node_id} finished");
Ok(())
} else if let Some(code) = status.code() {
if let Some(meaning) = exit_code_meaning(code) {
Err(eyre!(
"runtime node {node_id} failed with exit code: {code}, meaning: {meaning}"
))
} else {
Err(eyre!(
"runtime node {node_id} failed with exit code: {code} with unknwon meaning."
))
}
} else {
Err(eyre!("runtime node {node_id} failed (unknown exit code)"))
}
});
Ok(result)
}

fn exit_code_meaning(code: i32) -> Option<String> {
if cfg!(unix) {
let meaning = match code {
0 => "Success",
1 => "Catchall for general errors",
2 => "Misuse of shell built-ins",
64 => "Usage Error",
65 => "Data Error",
66 => "No Input",
67 => "No User",
68 => "No Host",
69 => "Service Unavailable",
70 => "Software Error",
71 => "OS Error",
72 => "OS File Error",
73 => "Cannot Create",
74 => "IO Error",
75 => "Temporary Failure",
76 => "Protocol Error",
77 => "No Permission",
78 => "Config Error",
126 => "Command invoked cannot execute",
127 => "Command not found",
128 => "Invalid argument to `exit`",
256.. => "Exit status out of range",
_ => "Unknown Error code.",
}
.to_string();
Some(meaning)
} else {
None
}
}

+ 23
- 0
binaries/coordinator/src/tcp_utils.rs View File

@@ -0,0 +1,23 @@
use tokio::{
io::{AsyncReadExt, AsyncWriteExt},
net::TcpStream,
};

pub async fn tcp_send(connection: &mut TcpStream, message: &[u8]) -> std::io::Result<()> {
let len_raw = (message.len() as u64).to_le_bytes();
connection.write_all(&len_raw).await?;
connection.write_all(message).await?;
connection.flush().await?;
Ok(())
}

pub async fn tcp_receive(connection: &mut TcpStream) -> std::io::Result<Vec<u8>> {
let reply_len = {
let mut raw = [0; 8];
connection.read_exact(&mut raw).await?;
u64::from_le_bytes(raw) as usize
};
let mut reply = vec![0; reply_len];
connection.read_exact(&mut reply).await?;
Ok(reply)
}

+ 27
- 0
binaries/daemon/Cargo.toml View File

@@ -0,0 +1,27 @@
[package]
name = "dora-daemon"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
eyre = "0.6.8"
tokio = { version = "1.20.1", features = ["full"] }
tokio-stream = { version = "0.1.11", features = ["net"] }
tracing = "0.1.36"
tracing-subscriber = "0.3.15"
futures-concurrency = "7.1.0"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = "1.0.86"
dora-core = { path = "../../libraries/core" }
flume = "0.10.14"
dora-download = { path = "../../libraries/extensions/download" }
serde_yaml = "0.8.23"
uuid = { version = "1.1.2", features = ["v4"] }
futures = "0.3.25"
clap = { version = "3.1.8", features = ["derive"] }
shared-memory-server = { path = "../../libraries/shared-memory-server" }
ctrlc = "3.2.5"
bincode = "1.3.3"
async-trait = "0.1.64"

+ 114
- 0
binaries/daemon/src/coordinator.rs View File

@@ -0,0 +1,114 @@
use crate::{
tcp_utils::{tcp_receive, tcp_send},
DaemonCoordinatorEvent,
};
use dora_core::{
coordinator_messages::{CoordinatorRequest, DaemonEvent, RegisterResult},
daemon_messages::DaemonCoordinatorReply,
};
use eyre::{eyre, Context};
use std::{io::ErrorKind, net::SocketAddr};
use tokio::{
net::TcpStream,
sync::{mpsc, oneshot},
};
use tokio_stream::{wrappers::ReceiverStream, Stream};

#[derive(Debug)]
pub struct CoordinatorEvent {
pub event: DaemonCoordinatorEvent,
pub reply_tx: oneshot::Sender<DaemonCoordinatorReply>,
}

pub async fn register(
addr: SocketAddr,
machine_id: String,
) -> eyre::Result<impl Stream<Item = CoordinatorEvent>> {
let mut stream = TcpStream::connect(addr)
.await
.wrap_err("failed to connect to dora-coordinator")?;
stream
.set_nodelay(true)
.wrap_err("failed to set TCP_NODELAY")?;
let register = serde_json::to_vec(&CoordinatorRequest::Register { machine_id })?;
tcp_send(&mut stream, &register)
.await
.wrap_err("failed to send register request to dora-coordinator")?;
let reply_raw = tcp_receive(&mut stream)
.await
.wrap_err("failed to register reply from dora-coordinator")?;
let result: RegisterResult = serde_json::from_slice(&reply_raw)
.wrap_err("failed to deserialize dora-coordinator reply")?;
result.to_result()?;
tracing::info!("Connected to dora-coordinator at {:?}", addr);

let (tx, rx) = mpsc::channel(1);
tokio::spawn(async move {
loop {
let event = match tcp_receive(&mut stream).await {
Ok(raw) => match serde_json::from_slice(&raw) {
Ok(event) => event,
Err(err) => {
let err =
eyre!(err).wrap_err("failed to deserialize incoming coordinator event");
tracing::warn!("{err:?}");
continue;
}
},
Err(err) if err.kind() == ErrorKind::UnexpectedEof => break,
Err(err) => {
let err = eyre!(err).wrap_err("failed to receive incoming event");
tracing::warn!("{err:?}");
continue;
}
};
let (reply_tx, reply_rx) = oneshot::channel();
match tx.send(CoordinatorEvent { event, reply_tx }).await {
Ok(()) => {}
Err(_) => {
// receiving end of channel was closed
break;
}
}

let Ok(reply) = reply_rx.await else {
tracing::warn!("daemon sent no reply");
continue;
};
let serialized = match serde_json::to_vec(&reply)
.wrap_err("failed to serialize DaemonCoordinatorReply")
{
Ok(r) => r,
Err(err) => {
tracing::error!("{err:?}");
continue;
}
};
if let Err(err) = tcp_send(&mut stream, &serialized).await {
tracing::warn!("failed to send reply to coordinator: {err}");
continue;
};
}
});

Ok(ReceiverStream::new(rx))
}

pub async fn send_event(
addr: SocketAddr,
machine_id: String,
event: DaemonEvent,
) -> eyre::Result<TcpStream> {
let mut stream = TcpStream::connect(addr)
.await
.wrap_err("failed to connect to dora-coordinator")?;
stream
.set_nodelay(true)
.wrap_err("failed to set TCP_NODELAY")?;
let msg = serde_json::to_vec(&CoordinatorRequest::Event { machine_id, event })?;
tcp_send(&mut stream, &msg)
.await
.wrap_err("failed to send event to dora-coordinator")?;

Ok(stream)
}

+ 974
- 0
binaries/daemon/src/lib.rs View File

@@ -0,0 +1,974 @@
use coordinator::CoordinatorEvent;
use dora_core::message::uhlc::HLC;
use dora_core::{
config::{DataId, InputMapping, NodeId},
coordinator_messages::DaemonEvent,
daemon_messages::{
self, DaemonCommunicationConfig, DaemonCoordinatorEvent, DaemonCoordinatorReply,
DaemonReply, DataflowId, DropToken, SpawnDataflowNodes,
},
descriptor::{CoreNodeKind, Descriptor, ResolvedNode},
};
use eyre::{bail, eyre, Context, ContextCompat};
use futures::{future, stream, FutureExt, TryFutureExt};
use futures_concurrency::stream::Merge;
use shared_mem_handler::SharedMemSample;
use std::{
borrow::Cow,
collections::{BTreeMap, BTreeSet, HashMap},
fmt, io,
net::SocketAddr,
path::{Path, PathBuf},
time::{Duration, Instant},
};
use tcp_utils::tcp_receive;
use tokio::{
fs,
sync::{mpsc, oneshot},
time::timeout,
};
use tokio_stream::{wrappers::ReceiverStream, Stream, StreamExt};
use uuid::Uuid;

mod coordinator;
mod listener;
mod shared_mem_handler;
mod spawn;
mod tcp_utils;

pub struct Daemon {
running: HashMap<DataflowId, RunningDataflow>,

events_tx: mpsc::Sender<Event>,

shared_memory_handler: flume::Sender<shared_mem_handler::DaemonEvent>,
shared_memory_handler_node: flume::Sender<shared_mem_handler::NodeEvent>,

coordinator_addr: Option<SocketAddr>,
machine_id: String,

/// used for testing and examples
exit_when_done: Option<BTreeSet<(Uuid, NodeId)>>,
/// used to record dataflow results when `exit_when_done` is used
dataflow_errors: Vec<(Uuid, NodeId, eyre::Report)>,

dora_runtime_path: Option<PathBuf>,
}

impl Daemon {
pub async fn run(
coordinator_addr: SocketAddr,
machine_id: String,
dora_runtime_path: Option<PathBuf>,
) -> eyre::Result<()> {
// connect to the coordinator
let coordinator_events = coordinator::register(coordinator_addr, machine_id.clone())
.await
.wrap_err("failed to connect to dora-coordinator")?
.map(Event::Coordinator);
Self::run_general(
coordinator_events,
Some(coordinator_addr),
machine_id,
None,
dora_runtime_path,
)
.await
.map(|_| ())
}

pub async fn run_dataflow(
dataflow_path: &Path,
dora_runtime_path: Option<PathBuf>,
) -> eyre::Result<()> {
let working_dir = dataflow_path
.canonicalize()
.context("failed to canoncialize dataflow path")?
.parent()
.ok_or_else(|| eyre::eyre!("canonicalized dataflow path has no parent"))?
.to_owned();

let descriptor = read_descriptor(dataflow_path).await?;
let nodes = descriptor.resolve_aliases();

let spawn_command = SpawnDataflowNodes {
dataflow_id: Uuid::new_v4(),
working_dir,
nodes,
daemon_communication: descriptor.daemon_config,
};

let exit_when_done = spawn_command
.nodes
.iter()
.map(|n| (spawn_command.dataflow_id, n.id.clone()))
.collect();
let (reply_tx, reply_rx) = oneshot::channel();
let coordinator_events = stream::once(async move {
Event::Coordinator(CoordinatorEvent {
event: DaemonCoordinatorEvent::Spawn(spawn_command),
reply_tx,
})
});
let run_result = Self::run_general(
Box::pin(coordinator_events),
None,
"".into(),
Some(exit_when_done),
dora_runtime_path,
);

let spawn_result = reply_rx
.map_err(|err| eyre!("failed to receive spawn result: {err}"))
.and_then(|r| async {
match r {
DaemonCoordinatorReply::SpawnResult(result) => result.map_err(|err| eyre!(err)),
_ => Err(eyre!("unexpected spawn reply")),
}
});

let (dataflow_errors, ()) = future::try_join(run_result, spawn_result).await?;

if dataflow_errors.is_empty() {
Ok(())
} else {
let mut output = "some nodes failed:".to_owned();
for (dataflow, node, error) in dataflow_errors {
use std::fmt::Write;
write!(&mut output, "\n - {dataflow}/{node}: {error}").unwrap();
}
bail!("{output}");
}
}

async fn run_general(
external_events: impl Stream<Item = Event> + Unpin,
coordinator_addr: Option<SocketAddr>,
machine_id: String,
exit_when_done: Option<BTreeSet<(Uuid, NodeId)>>,
dora_runtime_path: Option<PathBuf>,
) -> eyre::Result<Vec<(Uuid, NodeId, eyre::Report)>> {
let (dora_events_tx, dora_events_rx) = mpsc::channel(5);
let ctrlc_tx = dora_events_tx.clone();
let mut ctrlc_sent = false;
ctrlc::set_handler(move || {
if ctrlc_sent {
tracing::warn!("received second ctrlc signal -> aborting immediately");
std::process::abort();
} else {
tracing::info!("received ctrlc signal");
if ctrlc_tx.blocking_send(Event::CtrlC).is_err() {
tracing::error!("failed to report ctrl-c event to dora-daemon");
}
ctrlc_sent = true;
}
})
.wrap_err("failed to set ctrl-c handler")?;

let (shared_memory_handler, shared_memory_daemon_rx) = flume::unbounded();
let (shared_memory_handler_node, shared_memory_node_rx) = flume::bounded(10);
let daemon = Self {
running: HashMap::new(),
events_tx: dora_events_tx,
shared_memory_handler,
shared_memory_handler_node,
coordinator_addr,
machine_id,
exit_when_done,
dora_runtime_path,
dataflow_errors: Vec::new(),
};
let (shmem_events_tx, shmem_events_rx) = flume::bounded(5);
tokio::spawn(async {
let mut handler = shared_mem_handler::SharedMemHandler::new(shmem_events_tx);
handler
.run(shared_memory_node_rx, shared_memory_daemon_rx)
.await;
});
let dora_events = ReceiverStream::new(dora_events_rx);
let shmem_events = shmem_events_rx.into_stream().map(Event::ShmemHandler);
let watchdog_interval = tokio_stream::wrappers::IntervalStream::new(tokio::time::interval(
Duration::from_secs(5),
))
.map(|_| Event::WatchdogInterval);
let events = (
external_events,
dora_events,
shmem_events,
watchdog_interval,
)
.merge();
daemon.run_inner(events).await
}

async fn run_inner(
mut self,
incoming_events: impl Stream<Item = Event> + Unpin,
) -> eyre::Result<Vec<(Uuid, NodeId, eyre::Report)>> {
let mut events = incoming_events;

while let Some(event) = events.next().await {
let start = Instant::now();

match event {
Event::Coordinator(CoordinatorEvent { event, reply_tx }) => {
let (reply, status) = self.handle_coordinator_event(event).await;
let _ = reply_tx.send(reply);
match status {
RunStatus::Continue => {}
RunStatus::Exit => break,
}
}
Event::Node {
dataflow_id: dataflow,
node_id,
event,
reply_sender,
} => {
self.handle_node_event(event, dataflow, node_id, reply_sender)
.await?
}
Event::Dora(event) => match self.handle_dora_event(event).await? {
RunStatus::Continue => {}
RunStatus::Exit => break,
},
Event::ShmemHandler(event) => self.handle_shmem_handler_event(event).await?,
Event::WatchdogInterval => {
if let Some(addr) = self.coordinator_addr {
let mut connection = coordinator::send_event(
addr,
self.machine_id.clone(),
DaemonEvent::Watchdog,
)
.await
.wrap_err("lost connection to coordinator")?;
let reply_raw = tcp_receive(&mut connection)
.await
.wrap_err("lost connection to coordinator")?;
let _: dora_core::coordinator_messages::WatchdogAck =
serde_json::from_slice(&reply_raw)
.wrap_err("received unexpected watchdog reply from coordinator")?;
}
}
Event::CtrlC => {
for dataflow in self.running.values_mut() {
dataflow.stop_all().await;
}
}
}

let elapsed = start.elapsed();
// if elapsed.as_micros() > 10 {
// tracing::debug!("handled event in {elapsed:?}: {event_debug}");
// }
}

Ok(self.dataflow_errors)
}

async fn handle_coordinator_event(
&mut self,
event: DaemonCoordinatorEvent,
) -> (DaemonCoordinatorReply, RunStatus) {
match event {
DaemonCoordinatorEvent::Spawn(SpawnDataflowNodes {
dataflow_id,
working_dir,
nodes,
daemon_communication,
}) => {
let result = self
.spawn_dataflow(dataflow_id, working_dir, nodes, daemon_communication)
.await;
if let Err(err) = &result {
tracing::error!("{err:?}");
}
let reply =
DaemonCoordinatorReply::SpawnResult(result.map_err(|err| format!("{err:?}")));
(reply, RunStatus::Continue)
}
DaemonCoordinatorEvent::StopDataflow { dataflow_id } => {
let stop = async {
let dataflow = self
.running
.get_mut(&dataflow_id)
.wrap_err_with(|| format!("no running dataflow with ID `{dataflow_id}`"))?;
dataflow.stop_all().await;
Result::<(), eyre::Report>::Ok(())
};
let reply = DaemonCoordinatorReply::StopResult(
stop.await.map_err(|err| format!("{err:?}")),
);
(reply, RunStatus::Continue)
}
DaemonCoordinatorEvent::Destroy => {
tracing::info!("received destroy command -> exiting");
let reply = DaemonCoordinatorReply::DestroyResult(Ok(()));
(reply, RunStatus::Exit)
}
DaemonCoordinatorEvent::Watchdog => {
(DaemonCoordinatorReply::WatchdogAck, RunStatus::Continue)
}
}
}

async fn spawn_dataflow(
&mut self,
dataflow_id: uuid::Uuid,
working_dir: PathBuf,
nodes: Vec<ResolvedNode>,
daemon_communication_config: DaemonCommunicationConfig,
) -> eyre::Result<()> {
let dataflow = match self.running.entry(dataflow_id) {
std::collections::hash_map::Entry::Vacant(entry) => entry.insert(Default::default()),
std::collections::hash_map::Entry::Occupied(_) => {
bail!("there is already a running dataflow with ID `{dataflow_id}`")
}
};
for node in nodes {
dataflow.running_nodes.insert(node.id.clone());
let inputs = node_inputs(&node);

for (input_id, mapping) in inputs {
dataflow
.open_inputs
.entry(node.id.clone())
.or_default()
.insert(input_id.clone());
match mapping {
InputMapping::User(mapping) => {
dataflow
.mappings
.entry((mapping.source, mapping.output))
.or_default()
.insert((node.id.clone(), input_id));
}
InputMapping::Timer { interval } => {
dataflow
.timers
.entry(interval)
.or_default()
.insert((node.id.clone(), input_id));
}
}
}

let node_id = node.id.clone();
spawn::spawn_node(
dataflow_id,
&working_dir,
node,
self.events_tx.clone(),
self.shared_memory_handler_node.clone(),
daemon_communication_config,
self.dora_runtime_path.as_deref(),
)
.await
.wrap_err_with(|| format!("failed to spawn node `{node_id}`"))?;
}
for interval in dataflow.timers.keys().copied() {
let events_tx = self.events_tx.clone();
let task = async move {
let mut interval_stream = tokio::time::interval(interval);
let hlc = HLC::default();
loop {
interval_stream.tick().await;

let event = DoraEvent::Timer {
dataflow_id,
interval,
metadata: dora_core::message::Metadata::from_parameters(
hlc.new_timestamp(),
Default::default(),
),
};
if events_tx.send(event.into()).await.is_err() {
break;
}
}
};
let (task, handle) = task.remote_handle();
tokio::spawn(task);
dataflow._timer_handles.push(handle);
}
Ok(())
}

async fn handle_node_event(
&mut self,
event: DaemonNodeEvent,
dataflow_id: DataflowId,
node_id: NodeId,
reply_sender: oneshot::Sender<DaemonReply>,
) -> eyre::Result<()> {
match event {
DaemonNodeEvent::Subscribe { event_sender } => {
let result = self.subscribe(dataflow_id, node_id, event_sender).await;
let _ = reply_sender.send(DaemonReply::Result(result));
}
DaemonNodeEvent::CloseOutputs(outputs) => {
// notify downstream nodes
let inner = async {
let dataflow = self
.running
.get_mut(&dataflow_id)
.wrap_err_with(|| format!("failed to get downstream nodes: no running dataflow with ID `{dataflow_id}`"))?;
send_input_closed_events(dataflow, |(source_id, output_id)| {
source_id == &node_id && outputs.contains(output_id)
})
.await;
Result::<_, eyre::Error>::Ok(())
};

let reply = inner.await.map_err(|err| format!("{err:?}"));
let _ = reply_sender.send(DaemonReply::Result(reply));
// TODO: notify remote nodes
}
DaemonNodeEvent::Stopped => {
tracing::info!("Stopped: {dataflow_id}/{node_id}");

let _ = reply_sender.send(DaemonReply::Result(Ok(())));

self.handle_node_stop(dataflow_id, &node_id).await?;
}
}
Ok(())
}

async fn subscribe(
&mut self,
dataflow_id: Uuid,
node_id: NodeId,
event_sender: flume::Sender<daemon_messages::NodeEvent>,
) -> Result<(), String> {
let dataflow = self.running.get_mut(&dataflow_id).ok_or_else(|| {
format!("subscribe failed: no running dataflow with ID `{dataflow_id}`")
})?;

// some inputs might have been closed already -> report those events
let closed_inputs = dataflow
.mappings
.values()
.flatten()
.filter(|(node, _)| node == &node_id)
.map(|(_, input)| input)
.filter(|input| {
dataflow
.open_inputs
.get(&node_id)
.map(|open_inputs| !open_inputs.contains(*input))
.unwrap_or(true)
});
for input_id in closed_inputs {
let _ = event_sender
.send_async(daemon_messages::NodeEvent::InputClosed {
id: input_id.clone(),
})
.await;
}

// if a stop event was already sent for the dataflow, send it to
// the newly connected node too
if dataflow.stop_sent {
let _ = event_sender
.send_async(daemon_messages::NodeEvent::Stop)
.await;
}

if dataflow.stop_sent || dataflow.open_inputs(&node_id).is_empty() {
tracing::debug!("Received subscribe message for closed event stream");
} else {
dataflow.subscribe_channels.insert(node_id, event_sender);
}

Ok(())
}

#[tracing::instrument(skip(self))]
async fn handle_node_stop(
&mut self,
dataflow_id: Uuid,
node_id: &NodeId,
) -> Result<(), eyre::ErrReport> {
let dataflow = self.running.get_mut(&dataflow_id).wrap_err_with(|| {
format!("failed to get downstream nodes: no running dataflow with ID `{dataflow_id}`")
})?;
send_input_closed_events(dataflow, |(source_id, _)| source_id == node_id).await;
dataflow.running_nodes.remove(node_id);
if dataflow.running_nodes.is_empty() {
tracing::info!(
"Dataflow `{dataflow_id}` finished on machine `{}`",
self.machine_id
);
if let Some(addr) = self.coordinator_addr {
if coordinator::send_event(
addr,
self.machine_id.clone(),
DaemonEvent::AllNodesFinished {
dataflow_id,
result: Ok(()),
},
)
.await
.is_err()
{
tracing::warn!("failed to report dataflow finish to coordinator");
}
}
self.running.remove(&dataflow_id);
}
Ok(())
}

async fn handle_dora_event(&mut self, event: DoraEvent) -> eyre::Result<RunStatus> {
match event {
DoraEvent::Timer {
dataflow_id,
interval,
metadata,
} => {
let Some(dataflow) = self.running.get_mut(&dataflow_id) else {
tracing::warn!("Timer event for unknown dataflow `{dataflow_id}`");
return Ok(RunStatus::Continue);
};

let Some(subscribers) = dataflow.timers.get(&interval) else {
return Ok(RunStatus::Continue);
};

let mut closed = Vec::new();
for (receiver_id, input_id) in subscribers {
let Some(channel) = dataflow.subscribe_channels.get(receiver_id) else {
continue;
};

let send_result = channel.send_async(daemon_messages::NodeEvent::Input {
id: input_id.clone(),
metadata: metadata.clone(),
data: None,
});
match timeout(Duration::from_millis(1), send_result).await {
Ok(Ok(())) => {}
Ok(Err(_)) => {
closed.push(receiver_id);
}
Err(_) => {
tracing::info!(
"dropping timer tick event for `{receiver_id}` (send timeout)"
);
}
}
}
for id in closed {
dataflow.subscribe_channels.remove(id);
}
}
DoraEvent::SpawnedNodeResult {
dataflow_id,
node_id,
exit_status,
} => {
let mut signal_exit = false;
let node_error = match exit_status {
NodeExitStatus::Success => {
tracing::info!("node {dataflow_id}/{node_id} finished successfully");
None
}
NodeExitStatus::IoError(err) => {
let err = eyre!(err).wrap_err(format!(
"I/O error while waiting for node `{dataflow_id}/{node_id}`"
));
tracing::error!("{err:?}");
Some(err)
}
NodeExitStatus::ExitCode(code) => {
let err =
eyre!("node {dataflow_id}/{node_id} finished with exit code {code}");
tracing::warn!("{err}");
Some(err)
}
NodeExitStatus::Signal(signal) => {
signal_exit = true;
let signal: Cow<_> = match signal {
1 => "SIGHUP".into(),
2 => "SIGINT".into(),
3 => "SIGQUIT".into(),
4 => "SIGILL".into(),
6 => "SIGABRT".into(),
8 => "SIGFPE".into(),
9 => "SIGKILL".into(),
11 => "SIGSEGV".into(),
13 => "SIGPIPE".into(),
14 => "SIGALRM".into(),
15 => "SIGTERM".into(),
22 => "SIGABRT".into(),
23 => "NSIG".into(),

other => other.to_string().into(),
};
let err = eyre!(
"node {dataflow_id}/{node_id} finished because of signal `{signal}`"
);
tracing::warn!("{err}");
Some(err)
}
NodeExitStatus::Unknown => {
let err =
eyre!("node {dataflow_id}/{node_id} finished with unknown exit code");
tracing::warn!("{err}");
Some(err)
}
};

if self
.running
.get(&dataflow_id)
.and_then(|d| d.running_nodes.get(&node_id))
.is_some()
{
if !signal_exit {
tracing::warn!(
"node `{dataflow_id}/{node_id}` finished without sending `Stopped` message"
);
}
self.handle_node_stop(dataflow_id, &node_id).await?;
}

if let Some(exit_when_done) = &mut self.exit_when_done {
if let Some(err) = node_error {
self.dataflow_errors
.push((dataflow_id, node_id.clone(), err));
}
exit_when_done.remove(&(dataflow_id, node_id));
if exit_when_done.is_empty() {
tracing::info!(
"exiting daemon because all required dataflows are finished"
);
return Ok(RunStatus::Exit);
}
}
}
}
Ok(RunStatus::Continue)
}

async fn handle_shmem_handler_event(&mut self, event: ShmemHandlerEvent) -> eyre::Result<()> {
match event {
ShmemHandlerEvent::SendOut {
dataflow_id,
node_id,
output_id,
metadata,
data,
} => {
let dataflow = self.running.get_mut(&dataflow_id).wrap_err_with(|| {
format!("send out failed: no running dataflow with ID `{dataflow_id}`")
})?;

tracing::trace!(
"Time between prepare and send out: {:?}",
metadata
.timestamp()
.get_time()
.to_system_time()
.elapsed()
.unwrap()
);

// figure out receivers from dataflow graph
let empty_set = BTreeSet::new();
let local_receivers = dataflow
.mappings
.get(&(node_id, output_id))
.unwrap_or(&empty_set);

// send shared memory ID to all local receivers
let mut closed = Vec::new();
let mut drop_tokens = Vec::new();
for (receiver_id, input_id) in local_receivers {
if let Some(channel) = dataflow.subscribe_channels.get(receiver_id) {
let drop_token = DropToken::generate();
let send_result = channel.send_async(daemon_messages::NodeEvent::Input {
id: input_id.clone(),
metadata: metadata.clone(),
data: data.as_ref().map(|data| daemon_messages::InputData {
shared_memory_id: data.get_os_id().to_owned(),
len: data.len(),
drop_token: drop_token.clone(),
}),
});

match timeout(Duration::from_millis(10), send_result).await {
Ok(Ok(())) => {
drop_tokens.push(drop_token);
}
Ok(Err(_)) => {
closed.push(receiver_id);
}
Err(_) => {
tracing::warn!(
"dropping input event `{receiver_id}/{input_id}` (send timeout)"
);
}
}
}
}
for id in closed {
dataflow.subscribe_channels.remove(id);
}
let data_bytes = data.as_ref().map(|d| unsafe { d.as_slice() }.to_owned());

// report drop tokens to shared memory handler
if let Some(data) = data {
if let Err(err) = self
.shared_memory_handler
.send_async(shared_mem_handler::DaemonEvent::SentOut {
data: *data,
drop_tokens,
})
.await
.wrap_err("shared mem handler crashed after send out")
{
tracing::error!("{err:?}");
}
}

// TODO send `data` via network to all remove receivers
if let Some(data) = data_bytes {}
}
ShmemHandlerEvent::HandlerError(err) => {
bail!(err.wrap_err("shared memory handler failed"))
}
}

Ok(())
}
}

fn node_inputs(node: &ResolvedNode) -> BTreeMap<DataId, InputMapping> {
match &node.kind {
CoreNodeKind::Custom(n) => n.run_config.inputs.clone(),
CoreNodeKind::Runtime(n) => runtime_node_inputs(n),
}
}

fn runtime_node_inputs(n: &dora_core::descriptor::RuntimeNode) -> BTreeMap<DataId, InputMapping> {
n.operators
.iter()
.flat_map(|operator| {
operator.config.inputs.iter().map(|(input_id, mapping)| {
(
DataId::from(format!("{}/{input_id}", operator.id)),
mapping.clone(),
)
})
})
.collect()
}

fn runtime_node_outputs(n: &dora_core::descriptor::RuntimeNode) -> BTreeSet<DataId> {
n.operators
.iter()
.flat_map(|operator| {
operator
.config
.outputs
.iter()
.map(|output_id| DataId::from(format!("{}/{output_id}", operator.id)))
})
.collect()
}

async fn send_input_closed_events<F>(dataflow: &mut RunningDataflow, mut filter: F)
where
F: FnMut(&(NodeId, DataId)) -> bool,
{
let downstream_nodes: BTreeSet<_> = dataflow
.mappings
.iter()
.filter(|(k, _)| filter(k))
.flat_map(|(_, v)| v)
.collect();
for (receiver_id, input_id) in downstream_nodes {
if let Some(channel) = dataflow.subscribe_channels.get(receiver_id) {
let _ = channel
.send_async(daemon_messages::NodeEvent::InputClosed {
id: input_id.clone(),
})
.await;
};

if let Some(open_inputs) = dataflow.open_inputs.get_mut(receiver_id) {
open_inputs.remove(input_id);
if open_inputs.is_empty() {
// close the subscriber channel
dataflow.subscribe_channels.remove(receiver_id);
}
}
}
}

#[derive(Default)]
pub struct RunningDataflow {
subscribe_channels: HashMap<NodeId, flume::Sender<daemon_messages::NodeEvent>>,
mappings: HashMap<OutputId, BTreeSet<InputId>>,
timers: BTreeMap<Duration, BTreeSet<InputId>>,
open_inputs: BTreeMap<NodeId, BTreeSet<DataId>>,
running_nodes: BTreeSet<NodeId>,
/// Keep handles to all timer tasks of this dataflow to cancel them on drop.
_timer_handles: Vec<futures::future::RemoteHandle<()>>,
stop_sent: bool,

/// Used in `open_inputs`.
///
/// TODO: replace this with a constant once `BTreeSet::new` is `const` on stable.
empty_set: BTreeSet<DataId>,
}

impl RunningDataflow {
async fn stop_all(&mut self) {
for (_node_id, channel) in self.subscribe_channels.drain() {
let _ = channel.send_async(daemon_messages::NodeEvent::Stop).await;
}
self.stop_sent = true;
}

fn open_inputs(&self, node_id: &NodeId) -> &BTreeSet<DataId> {
self.open_inputs.get(node_id).unwrap_or(&self.empty_set)
}
}

type OutputId = (NodeId, DataId);
type InputId = (NodeId, DataId);

#[derive(Debug)]
pub enum Event {
Node {
dataflow_id: DataflowId,
node_id: NodeId,
event: DaemonNodeEvent,
reply_sender: oneshot::Sender<DaemonReply>,
},
Coordinator(CoordinatorEvent),
Dora(DoraEvent),
ShmemHandler(ShmemHandlerEvent),
WatchdogInterval,
CtrlC,
}

impl From<DoraEvent> for Event {
fn from(event: DoraEvent) -> Self {
Event::Dora(event)
}
}
impl From<ShmemHandlerEvent> for Event {
fn from(event: ShmemHandlerEvent) -> Self {
Event::ShmemHandler(event)
}
}

#[derive(Debug)]
pub enum DaemonNodeEvent {
Stopped,
Subscribe {
event_sender: flume::Sender<daemon_messages::NodeEvent>,
},
CloseOutputs(Vec<dora_core::config::DataId>),
}

pub enum ShmemHandlerEvent {
SendOut {
dataflow_id: DataflowId,
node_id: NodeId,
output_id: DataId,
metadata: dora_core::message::Metadata<'static>,
data: Option<Box<SharedMemSample>>,
},
HandlerError(eyre::ErrReport),
}

impl fmt::Debug for ShmemHandlerEvent {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::SendOut {
dataflow_id,
node_id,
output_id,
metadata,
data,
} => f
.debug_struct("SendOut")
.field("dataflow_id", dataflow_id)
.field("node_id", node_id)
.field("output_id", output_id)
.field("metadata", metadata)
.field("data", &data.as_ref().map(|_| "Some(..)").unwrap_or("None"))
.finish(),
ShmemHandlerEvent::HandlerError(err) => {
f.debug_tuple("HandlerError").field(err).finish()
}
}
}
}

#[derive(Debug)]
pub enum DoraEvent {
Timer {
dataflow_id: DataflowId,
interval: Duration,
metadata: dora_core::message::Metadata<'static>,
},
SpawnedNodeResult {
dataflow_id: DataflowId,
node_id: NodeId,
exit_status: NodeExitStatus,
},
}

#[derive(Debug)]
pub enum NodeExitStatus {
Success,
IoError(io::Error),
ExitCode(i32),
Signal(i32),
Unknown,
}

impl From<Result<std::process::ExitStatus, io::Error>> for NodeExitStatus {
fn from(result: Result<std::process::ExitStatus, io::Error>) -> Self {
match result {
Ok(status) => {
if status.success() {
NodeExitStatus::Success
} else if let Some(code) = status.code() {
Self::ExitCode(code)
} else {
#[cfg(unix)]
{
use std::os::unix::process::ExitStatusExt;
if let Some(signal) = status.signal() {
return Self::Signal(signal);
}
}
Self::Unknown
}
}
Err(err) => Self::IoError(err),
}
}
}

type MessageId = String;

#[must_use]
enum RunStatus {
Continue,
Exit,
}

pub async fn read_descriptor(file: &Path) -> eyre::Result<Descriptor> {
let descriptor_file = fs::read(file).await.wrap_err("failed to open given file")?;
let descriptor: Descriptor =
serde_yaml::from_slice(&descriptor_file).context("failed to parse given descriptor")?;
Ok(descriptor)
}

+ 419
- 0
binaries/daemon/src/listener/mod.rs View File

@@ -0,0 +1,419 @@
use crate::{shared_mem_handler, DaemonNodeEvent, Event};
use dora_core::{
config::NodeId,
daemon_messages::{
DaemonCommunication, DaemonCommunicationConfig, DaemonReply, DaemonRequest, DataflowId,
DropEvent, NodeEvent,
},
};
use eyre::{eyre, Context};
use shared_memory_server::{ShmemConf, ShmemServer};
use std::{collections::VecDeque, net::Ipv4Addr};
use tokio::{
net::TcpListener,
sync::{mpsc, oneshot},
};

// TODO unify and avoid duplication;
pub mod shmem;
pub mod tcp;

pub async fn spawn_listener_loop(
dataflow_id: &DataflowId,
node_id: &NodeId,
daemon_tx: &mpsc::Sender<Event>,
shmem_handler_tx: &flume::Sender<shared_mem_handler::NodeEvent>,
config: DaemonCommunicationConfig,
) -> eyre::Result<DaemonCommunication> {
match config {
DaemonCommunicationConfig::Tcp => {
let localhost = Ipv4Addr::new(127, 0, 0, 1);
let socket = match TcpListener::bind((localhost, 0)).await {
Ok(socket) => socket,
Err(err) => {
return Err(
eyre::Report::new(err).wrap_err("failed to create local TCP listener")
)
}
};
let socket_addr = socket
.local_addr()
.wrap_err("failed to get local addr of socket")?;

let event_loop_node_id = format!("{dataflow_id}/{node_id}");
let daemon_tx = daemon_tx.clone();
let shmem_handler_tx = shmem_handler_tx.clone();
tokio::spawn(async move {
tcp::listener_loop(socket, daemon_tx, shmem_handler_tx).await;
tracing::debug!("event listener loop finished for `{event_loop_node_id}`");
});

Ok(DaemonCommunication::Tcp { socket_addr })
}
DaemonCommunicationConfig::Shmem => {
let daemon_control_region = ShmemConf::new()
.size(4096)
.create()
.wrap_err("failed to allocate daemon_control_region")?;
let daemon_events_region = ShmemConf::new()
.size(4096)
.create()
.wrap_err("failed to allocate daemon_events_region")?;
let daemon_control_region_id = daemon_control_region.get_os_id().to_owned();
let daemon_events_region_id = daemon_events_region.get_os_id().to_owned();

{
let server = unsafe { ShmemServer::new(daemon_control_region) }
.wrap_err("failed to create control server")?;
let daemon_tx = daemon_tx.clone();
let shmem_handler_tx = shmem_handler_tx.clone();
tokio::spawn(shmem::listener_loop(server, daemon_tx, shmem_handler_tx));
}

{
let server = unsafe { ShmemServer::new(daemon_events_region) }
.wrap_err("failed to create events server")?;
let event_loop_node_id = format!("{dataflow_id}/{node_id}");
let daemon_tx = daemon_tx.clone();
let shmem_handler_tx = shmem_handler_tx.clone();
tokio::task::spawn(async move {
shmem::listener_loop(server, daemon_tx, shmem_handler_tx).await;
tracing::debug!("event listener loop finished for `{event_loop_node_id}`");
});
}

Ok(DaemonCommunication::Shmem {
daemon_control_region_id,
daemon_events_region_id,
})
}
}
}

struct Listener<C> {
dataflow_id: DataflowId,
node_id: NodeId,
daemon_tx: mpsc::Sender<Event>,
shmem_handler_tx: flume::Sender<shared_mem_handler::NodeEvent>,
subscribed_events: Option<flume::Receiver<NodeEvent>>,
max_queue_len: usize,
queue: VecDeque<NodeEvent>,
connection: C,
}

impl<C> Listener<C>
where
C: Connection,
{
pub(crate) async fn run(
mut connection: C,
daemon_tx: mpsc::Sender<Event>,
shmem_handler_tx: flume::Sender<shared_mem_handler::NodeEvent>,
) {
// receive the first message
let message = match connection
.receive_message()
.await
.wrap_err("failed to receive register message")
{
Ok(Some(m)) => m,
Ok(None) => {
tracing::info!("channel disconnected before register message");
return;
} // disconnected
Err(err) => {
tracing::info!("{err:?}");
return;
}
};

match message {
DaemonRequest::Register {
dataflow_id,
node_id,
} => {
let reply = DaemonReply::Result(Ok(()));
match connection
.send_reply(reply)
.await
.wrap_err("failed to send register reply")
{
Ok(()) => {
let mut listener = Listener {
dataflow_id,
node_id,
connection,
daemon_tx,
shmem_handler_tx,
subscribed_events: None,
max_queue_len: 10, // TODO: make this configurable
queue: VecDeque::new(),
};
match listener.run_inner().await.wrap_err("listener failed") {
Ok(()) => {}
Err(err) => tracing::error!("{err:?}"),
}
}
Err(err) => {
tracing::warn!("{err:?}");
}
}
}
other => {
tracing::warn!("expected register message, got `{other:?}`");
let reply = DaemonReply::Result(Err("must send register message first".into()));
if let Err(err) = connection
.send_reply(reply)
.await
.wrap_err("failed to send reply")
{
tracing::warn!("{err:?}");
}
}
}
}

async fn run_inner(&mut self) -> eyre::Result<()> {
loop {
// receive the next node message
let message = match self
.connection
.receive_message()
.await
.wrap_err("failed to receive DaemonRequest")
{
Ok(Some(m)) => m,
Ok(None) => {
tracing::debug!(
"channel disconnected: {}/{}",
self.dataflow_id,
self.node_id
);
break;
} // disconnected
Err(err) => {
tracing::warn!("{err:?}");
continue;
}
};

// handle incoming events
self.handle_events().await?;

self.handle_message(message).await?;
}
Ok(())
}

async fn handle_events(&mut self) -> eyre::Result<()> {
if let Some(events) = &mut self.subscribed_events {
while let Ok(event) = events.try_recv() {
self.queue.push_back(event);
}

// drop oldest input events to maintain max queue length queue
let input_event_count = self
.queue
.iter()
.filter(|e| matches!(e, NodeEvent::Input { .. }))
.count();
let drop_n = input_event_count.saturating_sub(self.max_queue_len);
if drop_n > 0 {
self.drop_oldest_inputs(drop_n).await?;
}
}
Ok(())
}

async fn drop_oldest_inputs(&mut self, number: usize) -> Result<(), eyre::ErrReport> {
tracing::debug!("dropping {number} inputs because event queue is too full");
let mut drop_tokens = Vec::new();
for i in 0..number {
// find index of oldest input event
let index = self
.queue
.iter()
.position(|e| matches!(e, NodeEvent::Input { .. }))
.unwrap_or_else(|| panic!("no input event found in drop iteration {i}"));

// remove that event
if let Some(NodeEvent::Input {
data: Some(data), ..
}) = self.queue.remove(index)
{
drop_tokens.push(data.drop_token);
}
}
self.report_drop_tokens(drop_tokens).await?;
Ok(())
}

#[tracing::instrument(skip(self), fields(%self.dataflow_id, %self.node_id))]
async fn handle_message(&mut self, message: DaemonRequest) -> eyre::Result<()> {
match message {
DaemonRequest::Register { .. } => {
let reply = DaemonReply::Result(Err("unexpected register message".into()));
self.send_reply(reply)
.await
.wrap_err("failed to send register reply")?;
}
DaemonRequest::Stopped => self.process_daemon_event(DaemonNodeEvent::Stopped).await?,
DaemonRequest::CloseOutputs(outputs) => {
self.process_daemon_event(DaemonNodeEvent::CloseOutputs(outputs))
.await?
}
DaemonRequest::PrepareOutputMessage {
output_id,
metadata,
data_len,
} => {
let (reply_sender, reply) = oneshot::channel();
let event = shared_mem_handler::NodeEvent::PrepareOutputMessage {
dataflow_id: self.dataflow_id,
node_id: self.node_id.clone(),
output_id,
metadata,
data_len,
reply_sender,
};
self.send_shared_memory_event(event).await?;
let reply = reply
.await
.wrap_err("failed to receive prepare output reply")?;
// tracing::debug!("prepare latency: {:?}", start.elapsed()?);
self.send_reply(reply)
.await
.wrap_err("failed to send PrepareOutputMessage reply")?;
}
DaemonRequest::SendPreparedMessage { id } => {
let (reply_sender, reply) = oneshot::channel();
let event = shared_mem_handler::NodeEvent::SendPreparedMessage { id, reply_sender };
self.send_shared_memory_event(event).await?;
self.send_reply(
reply
.await
.wrap_err("failed to receive SendPreparedMessage reply")?,
)
.await?;
}
DaemonRequest::SendEmptyMessage {
output_id,
metadata,
} => {
// let elapsed = metadata.timestamp().get_time().to_system_time().elapsed()?;
// tracing::debug!("listener SendEmptyMessage: {elapsed:?}");
let event = crate::Event::ShmemHandler(crate::ShmemHandlerEvent::SendOut {
dataflow_id: self.dataflow_id,
node_id: self.node_id.clone(),
output_id,
metadata,
data: None,
});
let result = self
.send_daemon_event(event)
.await
.map_err(|_| "failed to receive send_empty_message reply".to_owned());
self.send_reply(DaemonReply::Result(result))
.await
.wrap_err("failed to send SendEmptyMessage reply")?;
}
DaemonRequest::Subscribe => {
let (tx, rx) = flume::bounded(100);
self.process_daemon_event(DaemonNodeEvent::Subscribe { event_sender: tx })
.await?;
self.subscribed_events = Some(rx);
}
DaemonRequest::NextEvent { drop_tokens } => {
self.report_drop_tokens(drop_tokens).await?;

// try to take the latest queued event first
let queued_event = self.queue.pop_front().map(DaemonReply::NodeEvent);
let reply = match queued_event {
Some(reply) => reply,
None => {
match self.subscribed_events.as_mut() {
// wait for next event
Some(events) => match events.recv_async().await {
Ok(event) => DaemonReply::NodeEvent(event),
Err(flume::RecvError::Disconnected) => DaemonReply::Closed,
},
None => DaemonReply::Result(Err(
"Ignoring event request because no subscribe \
message was sent yet"
.into(),
)),
}
}
};

self.send_reply(reply)
.await
.wrap_err("failed to send NextEvent reply")?;
}
}
Ok(())
}

async fn report_drop_tokens(
&mut self,
drop_tokens: Vec<dora_core::daemon_messages::DropToken>,
) -> eyre::Result<()> {
if !drop_tokens.is_empty() {
let drop_event = shared_mem_handler::NodeEvent::Drop(DropEvent {
tokens: drop_tokens,
});
self.send_shared_memory_event(drop_event).await?;
}
Ok(())
}

async fn process_daemon_event(&mut self, event: DaemonNodeEvent) -> eyre::Result<()> {
// send NodeEvent to daemon main loop
let (reply_tx, reply) = oneshot::channel();
let event = Event::Node {
dataflow_id: self.dataflow_id,
node_id: self.node_id.clone(),
event,
reply_sender: reply_tx,
};
self.daemon_tx
.send(event)
.await
.map_err(|_| eyre!("failed to send event to daemon"))?;
let reply = reply
.await
.map_err(|_| eyre!("failed to receive reply from daemon"))?;
self.send_reply(reply).await?;
Ok(())
}

async fn send_reply(&mut self, reply: DaemonReply) -> eyre::Result<()> {
self.connection
.send_reply(reply)
.await
.wrap_err_with(|| format!("failed to send reply to node `{}`", self.node_id))
}

async fn send_shared_memory_event(
&self,
event: shared_mem_handler::NodeEvent,
) -> eyre::Result<()> {
self.shmem_handler_tx
.send_async(event)
.await
.map_err(|_| eyre!("failed to send event to shared_mem_handler"))
}

async fn send_daemon_event(&self, event: crate::Event) -> eyre::Result<()> {
self.daemon_tx
.send(event)
.await
.map_err(|_| eyre!("failed to send event to daemon"))
}
}

#[async_trait::async_trait]
trait Connection {
async fn receive_message(&mut self) -> eyre::Result<Option<DaemonRequest>>;
async fn send_reply(&mut self, message: DaemonReply) -> eyre::Result<()>;
}

+ 75
- 0
binaries/daemon/src/listener/shmem.rs View File

@@ -0,0 +1,75 @@
use super::Listener;
use crate::{shared_mem_handler, Event};
use dora_core::daemon_messages::{DaemonReply, DaemonRequest};
use eyre::eyre;
use shared_memory_server::ShmemServer;
use tokio::sync::{mpsc, oneshot};

#[tracing::instrument(skip(server, daemon_tx, shmem_handler_tx))]
pub async fn listener_loop(
mut server: ShmemServer<DaemonRequest, DaemonReply>,
daemon_tx: mpsc::Sender<Event>,
shmem_handler_tx: flume::Sender<shared_mem_handler::NodeEvent>,
) {
let (tx, rx) = flume::bounded(0);
tokio::task::spawn_blocking(move || {
while let Ok(operation) = rx.recv() {
match operation {
Operation::Receive(sender) => {
if sender.send(server.listen()).is_err() {
break;
}
}
Operation::Send {
message,
result_sender,
} => {
let result = server.send_reply(&message);
if result_sender.send(result).is_err() {
break;
}
}
}
}
});
let connection = ShmemConnection(tx);
Listener::run(connection, daemon_tx, shmem_handler_tx).await
}

enum Operation {
Receive(oneshot::Sender<eyre::Result<Option<DaemonRequest>>>),
Send {
message: DaemonReply,
result_sender: oneshot::Sender<eyre::Result<()>>,
},
}

struct ShmemConnection(flume::Sender<Operation>);

#[async_trait::async_trait]
impl super::Connection for ShmemConnection {
async fn receive_message(&mut self) -> eyre::Result<Option<DaemonRequest>> {
let (tx, rx) = oneshot::channel();
self.0
.send_async(Operation::Receive(tx))
.await
.map_err(|_| eyre!("failed send receive request to ShmemServer"))?;
rx.await
.map_err(|_| eyre!("failed to receive from ShmemServer"))
.and_then(|r| r)
}

async fn send_reply(&mut self, reply: DaemonReply) -> eyre::Result<()> {
let (tx, rx) = oneshot::channel();
self.0
.send_async(Operation::Send {
message: reply,
result_sender: tx,
})
.await
.map_err(|_| eyre!("failed send send request to ShmemServer"))?;
rx.await
.map_err(|_| eyre!("failed to receive from ShmemServer"))
.and_then(|r| r)
}
}

+ 85
- 0
binaries/daemon/src/listener/tcp.rs View File

@@ -0,0 +1,85 @@
use std::io::ErrorKind;

use super::Listener;
use crate::{
shared_mem_handler,
tcp_utils::{tcp_receive, tcp_send},
Event,
};
use dora_core::daemon_messages::{DaemonReply, DaemonRequest};
use eyre::Context;
use tokio::{
net::{TcpListener, TcpStream},
sync::mpsc,
};

#[tracing::instrument(skip(listener, daemon_tx, shmem_handler_tx))]
pub async fn listener_loop(
listener: TcpListener,
daemon_tx: mpsc::Sender<Event>,
shmem_handler_tx: flume::Sender<shared_mem_handler::NodeEvent>,
) {
loop {
match listener
.accept()
.await
.wrap_err("failed to accept new connection")
{
Err(err) => {
tracing::info!("{err}");
}
Ok((connection, _)) => {
tokio::spawn(handle_connection_loop(
connection,
daemon_tx.clone(),
shmem_handler_tx.clone(),
));
}
}
}
}

#[tracing::instrument(skip(connection, daemon_tx, shmem_handler_tx))]
async fn handle_connection_loop(
connection: TcpStream,
daemon_tx: mpsc::Sender<Event>,
shmem_handler_tx: flume::Sender<shared_mem_handler::NodeEvent>,
) {
if let Err(err) = connection.set_nodelay(true) {
tracing::warn!("failed to set nodelay for connection: {err}");
}

Listener::run(TcpConnection(connection), daemon_tx, shmem_handler_tx).await
}

struct TcpConnection(TcpStream);

#[async_trait::async_trait]
impl super::Connection for TcpConnection {
async fn receive_message(&mut self) -> eyre::Result<Option<DaemonRequest>> {
let raw = match tcp_receive(&mut self.0).await {
Ok(raw) => raw,
Err(err) => match err.kind() {
ErrorKind::UnexpectedEof
| ErrorKind::ConnectionAborted
| ErrorKind::ConnectionReset => return Ok(None),
_other => {
return Err(err)
.context("unexpected I/O error while trying to receive DaemonRequest")
}
},
};
bincode::deserialize(&raw)
.wrap_err("failed to deserialize DaemonRequest")
.map(Some)
}

async fn send_reply(&mut self, message: DaemonReply) -> eyre::Result<()> {
let serialized =
bincode::serialize(&message).wrap_err("failed to serialize DaemonReply")?;
tcp_send(&mut self.0, &serialized)
.await
.wrap_err("failed to send DaemonReply")?;
Ok(())
}
}

+ 60
- 0
binaries/daemon/src/main.rs View File

@@ -0,0 +1,60 @@
use dora_core::topics::DORA_COORDINATOR_PORT_DEFAULT;
use dora_daemon::Daemon;
use eyre::Context;
use std::{net::Ipv4Addr, path::PathBuf};
use tracing::metadata::LevelFilter;
use tracing_subscriber::Layer;

#[derive(Debug, Clone, clap::Parser)]
#[clap(about = "Dora daemon")]
pub struct Args {
#[clap(long)]
pub run_dataflow: Option<PathBuf>,

#[clap(long)]
pub dora_runtime_path: Option<PathBuf>,
}

#[tokio::main]
async fn main() -> eyre::Result<()> {
// the tokio::main proc macro confuses some tools such as rust-analyzer, so
// directly invoke a "normal" async function
run().await
}

async fn run() -> eyre::Result<()> {
set_up_tracing().wrap_err("failed to set up tracing subscriber")?;

let Args {
run_dataflow,
dora_runtime_path,
} = clap::Parser::parse();

match run_dataflow {
Some(dataflow_path) => {
tracing::info!("Starting dataflow `{}`", dataflow_path.display());

Daemon::run_dataflow(&dataflow_path, dora_runtime_path).await
}
None => {
tracing::info!("Starting in local mode");
let localhost = Ipv4Addr::new(127, 0, 0, 1);
let coordinator_socket = (localhost, DORA_COORDINATOR_PORT_DEFAULT);

let machine_id = String::new(); // TODO

Daemon::run(coordinator_socket.into(), machine_id, dora_runtime_path).await
}
}
}

fn set_up_tracing() -> eyre::Result<()> {
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;

let stdout_log = tracing_subscriber::fmt::layer()
.pretty()
.with_filter(LevelFilter::DEBUG);
let subscriber = tracing_subscriber::Registry::default().with(stdout_log);
tracing::subscriber::set_global_default(subscriber)
.context("failed to set tracing global subscriber")
}

+ 309
- 0
binaries/daemon/src/shared_mem_handler.rs View File

@@ -0,0 +1,309 @@
use core::fmt;
use std::{
collections::{HashMap, HashSet, VecDeque},
sync::Arc,
};

use dora_core::{
config::{DataId, NodeId},
daemon_messages::{DaemonReply, DataflowId, DropEvent, DropToken},
};
use eyre::{eyre, Context};
use flume::{Receiver, Sender};
use futures::StreamExt;
use futures_concurrency::stream::Merge;
use shared_memory_server::{Shmem, ShmemConf};
use tokio::sync::oneshot;
use uuid::Uuid;

use crate::MessageId;

pub struct SharedMemHandler {
events_tx: Sender<crate::ShmemHandlerEvent>,
prepared_messages: HashMap<String, PreparedMessage>,
sent_out_shared_memory: HashMap<DropToken, Arc<ShmemHandle>>,
dropped: HashSet<DropToken>,

cache: VecDeque<ShmemHandle>,
}

impl SharedMemHandler {
pub fn new(events_tx: Sender<crate::ShmemHandlerEvent>) -> Self {
Self {
events_tx,
prepared_messages: HashMap::new(),
sent_out_shared_memory: HashMap::new(),
dropped: HashSet::new(),
cache: VecDeque::new(),
}
}

pub async fn run(
&mut self,
node_events: Receiver<NodeEvent>,
daemon_events: Receiver<DaemonEvent>,
) {
if let Err(err) = self.run_inner(node_events, daemon_events).await {
if let Err(send_err) = self
.events_tx
.send_async(crate::ShmemHandlerEvent::HandlerError(err))
.await
{
tracing::error!("{send_err:?}");
}
}
}

pub async fn run_inner(
&mut self,
node_events: Receiver<NodeEvent>,
daemon_events: Receiver<DaemonEvent>,
) -> eyre::Result<()> {
let mut events = (
node_events.stream().map(Event::Node),
daemon_events.stream().map(Event::Daemon),
)
.merge();
while let Some(event) = events.next().await {
match event {
Event::Node(event) => self.handle_node_event(event).await?,
Event::Daemon(event) => self.handle_daemon_event(event).await?,
}
}
Ok(())
}

async fn handle_node_event(&mut self, event: NodeEvent) -> eyre::Result<()> {
match event {
NodeEvent::Drop(DropEvent { tokens }) => {
for token in tokens {
match self.sent_out_shared_memory.remove(&token) {
Some(arc) => {
if let Ok(shmem) = Arc::try_unwrap(arc) {
self.add_to_cache(shmem);
}
}
None => {
self.dropped.insert(token);
}
}
}
}
NodeEvent::PrepareOutputMessage {
dataflow_id,
node_id,
output_id,
metadata,
data_len,
reply_sender,
} => {
tracing::trace!(
"Time between construct and prepare: {:?}",
metadata
.timestamp()
.get_time()
.to_system_time()
.elapsed()
.unwrap()
);

let memory = if data_len > 0 {
let cache_index = self
.cache
.iter()
.enumerate()
.rev()
.filter(|(_, s)| s.size() >= data_len)
.min_by_key(|(_, s)| s.size())
.map(|(i, _)| i);
let memory = match cache_index {
Some(i) => {
// we know that this index exists, so we can safely unwrap here
self.cache.remove(i).unwrap()
}
None => ShmemHandle(Box::new(
ShmemConf::new()
.size(data_len)
.create()
.wrap_err("failed to allocate shared memory")?,
)),
};
assert!(memory.size() >= data_len);
Some(memory)
} else {
None
};
let id = memory
.as_ref()
.map(|m| m.0.get_os_id().to_owned())
.unwrap_or_else(|| Uuid::new_v4().to_string());
let message = PreparedMessage {
dataflow_id,
node_id,
output_id,
metadata,
data: memory.map(|m| (m, data_len)),
};
self.prepared_messages.insert(id.clone(), message);

let reply = DaemonReply::PreparedMessage {
shared_memory_id: id.clone(),
};
if reply_sender.send(reply).is_err() {
// free shared memory slice again
self.prepared_messages.remove(&id);
}
}
NodeEvent::SendPreparedMessage { id, reply_sender } => {
let message = self
.prepared_messages
.remove(&id)
.ok_or_else(|| eyre!("invalid shared memory id"))?;
let PreparedMessage {
dataflow_id,
node_id,
output_id,
metadata,
data,
} = message;
let data = data.map(|(m, len)| {
SharedMemSample {
shared_memory: m,
len,
}
.into()
});

let send_result = self
.events_tx
.send_async(crate::ShmemHandlerEvent::SendOut {
dataflow_id,
node_id,
output_id,
metadata,
data,
})
.await;
let _ = reply_sender.send(DaemonReply::Result(
send_result.map_err(|_| "daemon is no longer running".into()),
));
}
}
Ok(())
}

async fn handle_daemon_event(&mut self, event: DaemonEvent) -> eyre::Result<()> {
match event {
DaemonEvent::SentOut { data, drop_tokens } => {
// keep shared memory alive until we received all drop tokens
let memory = Arc::new(data.shared_memory);
for drop_token in drop_tokens {
if self.dropped.remove(&drop_token) {
// this token was already dropped -> ignore
} else {
self.sent_out_shared_memory
.insert(drop_token, memory.clone());
}
}
if let Ok(memory) = Arc::try_unwrap(memory) {
self.add_to_cache(memory);
}
}
}
Ok(())
}

fn add_to_cache(&mut self, memory: ShmemHandle) {
const MAX_CACHE_SIZE: usize = 20;

self.cache.push_back(memory);
while self.cache.len() > MAX_CACHE_SIZE {
self.cache.pop_front();
}
}
}

pub struct SharedMemSample {
shared_memory: ShmemHandle,
len: usize,
}

impl SharedMemSample {
pub fn as_raw_slice(&self) -> *const [u8] {
std::ptr::slice_from_raw_parts(self.shared_memory.0.as_ptr(), self.len)
}

pub unsafe fn as_slice(&self) -> &[u8] {
unsafe { &*self.as_raw_slice() }
}

pub fn get_os_id(&self) -> &str {
self.shared_memory.0.get_os_id()
}

pub fn len(&self) -> usize {
self.len
}
}

#[derive(Debug)]
enum Event {
Node(NodeEvent),
Daemon(DaemonEvent),
}

#[derive(Debug)]
pub enum NodeEvent {
PrepareOutputMessage {
dataflow_id: DataflowId,
node_id: NodeId,
output_id: DataId,
metadata: dora_core::message::Metadata<'static>,
data_len: usize,
reply_sender: oneshot::Sender<DaemonReply>,
},
SendPreparedMessage {
id: MessageId,
reply_sender: oneshot::Sender<DaemonReply>,
},
Drop(DropEvent),
}

pub enum DaemonEvent {
SentOut {
data: SharedMemSample,
drop_tokens: Vec<DropToken>,
},
}
impl fmt::Debug for DaemonEvent {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::SentOut {
data: _,
drop_tokens,
} => f
.debug_struct("SentOut")
.field("data", &"[..]")
.field("drop_tokens", drop_tokens)
.finish(),
}
}
}

struct PreparedMessage {
dataflow_id: DataflowId,
node_id: NodeId,
output_id: DataId,
metadata: dora_core::message::Metadata<'static>,
data: Option<(ShmemHandle, usize)>,
}

struct ShmemHandle(Box<Shmem>);
impl ShmemHandle {
fn size(&self) -> usize {
self.0.len()
}
}

unsafe impl Send for ShmemHandle {}
unsafe impl Sync for ShmemHandle {}

+ 163
- 0
binaries/daemon/src/spawn.rs View File

@@ -0,0 +1,163 @@
use crate::{
listener::spawn_listener_loop, runtime_node_inputs, runtime_node_outputs, shared_mem_handler,
DoraEvent, Event, NodeExitStatus,
};
use dora_core::{
config::NodeRunConfig,
daemon_messages::{DaemonCommunicationConfig, DataflowId, NodeConfig, RuntimeConfig},
descriptor::{resolve_path, source_is_url, OperatorSource, ResolvedNode},
};
use dora_download::download_file;
use eyre::WrapErr;
use std::{env::consts::EXE_EXTENSION, path::Path, process::Stdio};
use tokio::sync::mpsc;

const SHELL_SOURCE: &str = "shell";

pub async fn spawn_node(
dataflow_id: DataflowId,
working_dir: &Path,
node: ResolvedNode,
daemon_tx: mpsc::Sender<Event>,
shmem_handler_tx: flume::Sender<shared_mem_handler::NodeEvent>,
config: DaemonCommunicationConfig,
dora_runtime_path: Option<&Path>,
) -> eyre::Result<()> {
let node_id = node.id.clone();
tracing::debug!("Spawning node `{dataflow_id}/{node_id}`");

let daemon_communication = spawn_listener_loop(
&dataflow_id,
&node_id,
&daemon_tx,
&shmem_handler_tx,
config,
)
.await?;

let mut child = match node.kind {
dora_core::descriptor::CoreNodeKind::Custom(n) => {
let mut command = match n.source.as_str() {
SHELL_SOURCE => {
if cfg!(target_os = "windows") {
let mut cmd = tokio::process::Command::new("cmd");
cmd.args(["/C", &n.args.clone().unwrap_or_default()]);
cmd
} else {
let mut cmd = tokio::process::Command::new("sh");
cmd.args(["-c", &n.args.clone().unwrap_or_default()]);
cmd
}
}
source => {
let resolved_path = if source_is_url(source) {
// try to download the shared library
let target_path = Path::new("build")
.join(node_id.to_string())
.with_extension(EXE_EXTENSION);
download_file(source, &target_path)
.await
.wrap_err("failed to download custom node")?;
target_path.clone()
} else {
resolve_path(source, working_dir).wrap_err_with(|| {
format!("failed to resolve node source `{}`", source)
})?
};

tracing::info!("spawning {}", resolved_path.display());
let mut cmd = tokio::process::Command::new(&resolved_path);
if let Some(args) = &n.args {
cmd.args(args.split_ascii_whitespace());
}
cmd
}
};

command.current_dir(working_dir);
command.stdin(Stdio::null());
let node_config = NodeConfig {
dataflow_id,
node_id: node_id.clone(),
run_config: n.run_config.clone(),
daemon_communication,
};

command.env(
"DORA_NODE_CONFIG",
serde_yaml::to_string(&node_config).wrap_err("failed to serialize node config")?,
);
// Injecting the env variable defined in the `yaml` into
// the node runtime.
if let Some(envs) = n.envs {
for (key, value) in envs {
command.env(key, value.to_string());
}
}
command.spawn().wrap_err_with(move || {
format!(
"failed to run `{}` with args `{}`",
n.source,
n.args.as_deref().unwrap_or_default()
)
})?
}
dora_core::descriptor::CoreNodeKind::Runtime(n) => {
let has_python_operator = n
.operators
.iter()
.any(|x| matches!(x.config.source, OperatorSource::Python { .. }));

let has_other_operator = n
.operators
.iter()
.any(|x| !matches!(x.config.source, OperatorSource::Python { .. }));

let mut command = if has_python_operator && !has_other_operator {
// Use python to spawn runtime if there is a python operator
let mut command = tokio::process::Command::new("python3");
command.args(["-c", "import dora; dora.start_runtime()"]);
command
} else if !has_python_operator && has_other_operator {
tokio::process::Command::new(
dora_runtime_path.unwrap_or_else(|| Path::new("dora-runtime")),
)
} else {
eyre::bail!("Runtime can not mix Python Operator with other type of operator.");
};
command.current_dir(working_dir);
command.stdin(Stdio::null());

let runtime_config = RuntimeConfig {
node: NodeConfig {
dataflow_id,
node_id: node_id.clone(),
run_config: NodeRunConfig {
inputs: runtime_node_inputs(&n),
outputs: runtime_node_outputs(&n),
},
daemon_communication,
},
operators: n.operators,
};
command.env(
"DORA_RUNTIME_CONFIG",
serde_yaml::to_string(&runtime_config)
.wrap_err("failed to serialize runtime config")?,
);

command.spawn().wrap_err("failed to run runtime")?
}
};

tokio::spawn(async move {
let exit_status = NodeExitStatus::from(child.wait().await);
let event = DoraEvent::SpawnedNodeResult {
dataflow_id,
node_id,
exit_status,
};
let _ = daemon_tx.send(event.into()).await;
});
Ok(())
}

+ 23
- 0
binaries/daemon/src/tcp_utils.rs View File

@@ -0,0 +1,23 @@
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};

pub async fn tcp_send(
connection: &mut (impl AsyncWrite + Unpin),
message: &[u8],
) -> std::io::Result<()> {
let len_raw = (message.len() as u64).to_le_bytes();
connection.write_all(&len_raw).await?;
connection.write_all(message).await?;
connection.flush().await?;
Ok(())
}

pub async fn tcp_receive(connection: &mut (impl AsyncRead + Unpin)) -> std::io::Result<Vec<u8>> {
let reply_len = {
let mut raw = [0; 8];
connection.read_exact(&mut raw).await?;
u64::from_le_bytes(raw) as usize
};
let mut reply = vec![0; reply_len];
connection.read_exact(&mut reply).await?;
Ok(reply)
}

+ 4
- 12
binaries/runtime/Cargo.toml View File

@@ -7,11 +7,7 @@ license = "Apache-2.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
clap = { version = "3.1.12", features = ["derive"] }
dora-node-api = { path = "../../apis/rust/node", default-features = false, features = [
"zenoh",
"iceoryx",
] }
dora-node-api = { path = "../../apis/rust/node", default-features = false }
dora-operator-api-python = { path = "../../apis/python/operator" }
dora-operator-api-types = { path = "../../apis/rust/operator/types" }
dora-core = { workspace = true }
@@ -24,21 +20,17 @@ opentelemetry = { version = "0.17", features = [
opentelemetry-system-metrics = { version = "0.1.1", optional = true }
eyre = "0.6.8"
futures = "0.3.21"
futures-concurrency = "2.0.3"
futures-concurrency = "7.1.0"
libloading = "0.7.3"
serde_yaml = "0.8.23"
tokio = { version = "1.24.2", features = ["full"] }
tokio-stream = "0.1.8"
zenoh = { git = "https://github.com/eclipse-zenoh/zenoh.git", rev = "79a136e4fd90b11ff5d775ced981af53c4f1071b" }
zenoh-config = { git = "https://github.com/eclipse-zenoh/zenoh.git", rev = "79a136e4fd90b11ff5d775ced981af53c4f1071b" }
fern = "0.6.1"
pyo3 = { version = "0.16", features = ["eyre", "abi3-py37"] }
# pyo3-abi3 flag allow simpler linking. See: https://pyo3.rs/v0.13.2/building_and_distribution.html
flume = "0.10.14"
dora-message = { path = "../../libraries/message" }
pyo3 = { version = "0.16", features = ["eyre", "abi3-py37"] }
tracing = "0.1.36"
tracing-subscriber = "0.3.15"
dora-download = { path = "../../libraries/extensions/download" }
flume = "0.10.14"

[features]
tracing = ["opentelemetry", "dora-tracing"]


+ 214
- 113
binaries/runtime/src/lib.rs View File

@@ -1,112 +1,113 @@
#![warn(unsafe_op_in_unsafe_fn)]

use dora_core::{
config::{CommunicationConfig, DataId, NodeId, OperatorId},
descriptor::OperatorDefinition,
};
use dora_node_api::{
self,
communication::{self, CommunicationLayer, Publisher, STOP_TOPIC},
manual_stop_publisher,
config::{DataId, OperatorId},
daemon_messages::RuntimeConfig,
descriptor::OperatorConfig,
};
use dora_node_api::DoraNode;
use eyre::{bail, Context, Result};
use futures::{Stream, StreamExt};
use operator::{spawn_operator, OperatorEvent, StopReason};
use futures_concurrency::stream::Merge;
use operator::{run_operator, OperatorEvent, StopReason};

use std::{
collections::{BTreeSet, HashMap},
mem,
};
use tokio::{runtime::Builder, sync::mpsc};
use tokio_stream::{wrappers::ReceiverStream, StreamMap};
use tokio_stream::wrappers::ReceiverStream;

mod operator;

pub fn main() -> eyre::Result<()> {
set_up_tracing().context("failed to set up tracing subscriber")?;

let node_id: NodeId = {
let raw =
std::env::var("DORA_NODE_ID").wrap_err("env variable DORA_NODE_ID must be set")?;
serde_yaml::from_str(&raw).context("failed to deserialize operator config")?
};
let communication_config: CommunicationConfig = {
let raw = std::env::var("DORA_COMMUNICATION_CONFIG")
.wrap_err("env variable DORA_COMMUNICATION_CONFIG must be set")?;
serde_yaml::from_str(&raw).context("failed to deserialize communication config")?
};
let operators: Vec<OperatorDefinition> = {
let raw =
std::env::var("DORA_OPERATORS").wrap_err("env variable DORA_OPERATORS must be set")?;
let config: RuntimeConfig = {
let raw = std::env::var("DORA_RUNTIME_CONFIG")
.wrap_err("env variable DORA_RUNTIME_CONFIG must be set")?;
serde_yaml::from_str(&raw).context("failed to deserialize operator config")?
};
let RuntimeConfig {
node: config,
operators,
} = config;
let node_id = config.node_id.clone();
let (node, daemon_events) = DoraNode::init(config)?;

let mut communication: Box<dyn CommunicationLayer> =
communication::init(&communication_config)?;

let mut operator_events = StreamMap::new();
let mut operator_stop_publishers = HashMap::new();
let mut operator_events_tx = HashMap::new();

for operator_config in &operators {
let (events_tx, events) = mpsc::channel(1);
let stop_publisher = publisher(
&node_id,
operator_config.id.clone(),
STOP_TOPIC.to_owned().into(),
communication.as_mut(),
)
.with_context(|| {
format!(
"failed to create stop publisher for operator {}",
operator_config.id
)
})?;
operator_stop_publishers.insert(operator_config.id.clone(), stop_publisher);

operator_events.insert(operator_config.id.clone(), ReceiverStream::new(events));
operator_events_tx.insert(operator_config.id.clone(), events_tx);
}
let operator_definition = if operators.is_empty() {
bail!("no operators");
} else if operators.len() > 1 {
bail!("multiple operators are not supported");
} else {
let mut ops = operators;
ops.remove(0)
};

let operator_events = operator_events.map(|(id, event)| Event::Operator { id, event });
let node_id_clone = node_id.clone();
let (operator_events_tx, events) = mpsc::channel(1);
let operator_id = operator_definition.id.clone();
let operator_events = ReceiverStream::new(events).map(move |event| Event::Operator {
id: operator_id.clone(),
event,
});
let daemon_events = Box::pin(futures::stream::unfold(daemon_events, |mut stream| async {
let event = stream.recv_async().await.map(|event| match event {
dora_node_api::Event::Stop => Event::Stop,
dora_node_api::Event::Input { id, metadata, data } => Event::Input {
id,
metadata,
data: data.map(|data| data.to_owned()),
},
dora_node_api::Event::InputClosed { id } => Event::InputClosed(id),
dora_node_api::Event::Error(err) => Event::Error(err),
_ => todo!(),
});
event.map(|event| (event, stream))
}));
let events = (operator_events, daemon_events).merge();
let tokio_runtime = Builder::new_current_thread()
.enable_all()
.build()
.wrap_err("Could not build a tokio runtime.")?;
let manual_stop_publisher = manual_stop_publisher(communication.as_mut())?;
let stop_thread = std::thread::spawn(move || -> Result<()> {
tokio_runtime.block_on(run(
node_id_clone,
operator_events,
operator_stop_publishers,
manual_stop_publisher,
))

let mut operator_channels = HashMap::new();
let (operator_channel, incoming_events) = operator::channel::channel(tokio_runtime.handle());
operator_channels.insert(operator_definition.id.clone(), operator_channel);

tracing::info!("spawning main task");
let operator_config = [(
operator_definition.id.clone(),
operator_definition.config.clone(),
)]
.into_iter()
.collect();
let main_task = std::thread::spawn(move || -> Result<()> {
tokio_runtime.block_on(run(node, operator_config, events, operator_channels))
});

for operator_config in &operators {
let events_tx = operator_events_tx.get(&operator_config.id).unwrap();
spawn_operator(
&node_id,
operator_config.clone(),
events_tx.clone(),
communication.as_mut(),
)
.wrap_err_with(|| format!("failed to init operator {}", operator_config.id))?;
let operator_id = operator_definition.id.clone();
run_operator(
&node_id,
operator_definition,
incoming_events,
operator_events_tx,
)
.wrap_err_with(|| format!("failed to run operator {operator_id}"))?;

match main_task.join() {
Ok(result) => result.wrap_err("main task failed")?,
Err(panic) => std::panic::resume_unwind(panic),
}

stop_thread
.join()
.map_err(|err| eyre::eyre!("Stop thread failed with err: {err:#?}"))?
.wrap_err("Stop loop thread failed unexpectedly.")?;
Ok(())
}

#[tracing::instrument(skip(node, events, operator_channels), fields(node.id))]
async fn run(
node_id: NodeId,
mut node: DoraNode,
operators: HashMap<OperatorId, OperatorConfig>,
mut events: impl Stream<Item = Event> + Unpin,
mut operator_stop_publishers: HashMap<OperatorId, Box<dyn Publisher>>,
manual_stop_publisher: Box<dyn Publisher>,
mut operator_channels: HashMap<OperatorId, flume::Sender<operator::IncomingEvent>>,
) -> eyre::Result<()> {
#[cfg(feature = "metrics")]
let _started = {
@@ -120,51 +121,151 @@ async fn run(
_started
};

let mut stopped_operators = BTreeSet::new();
let mut open_operator_inputs: HashMap<_, BTreeSet<_>> = operators
.iter()
.map(|(id, config)| (id, config.inputs.keys().collect()))
.collect();

while let Some(event) = events.next().await {
match event {
Event::Operator { id, event } => {
Event::Operator {
id: operator_id,
event,
} => {
match event {
OperatorEvent::Error(err) => {
bail!(err.wrap_err(format!("operator {id} failed")))
bail!(err.wrap_err(format!("operator {operator_id} failed")))
}
OperatorEvent::Panic(payload) => {
bail!("operator {operator_id} panicked: {payload:?}");
}
OperatorEvent::Panic(payload) => std::panic::resume_unwind(payload),
OperatorEvent::Finished { reason } => {
if let StopReason::ExplicitStopAll = reason {
let hlc = dora_message::uhlc::HLC::default();
let metadata = dora_message::Metadata::new(hlc.new_timestamp());
let hlc = dora_core::message::uhlc::HLC::default();
let metadata = dora_core::message::Metadata::new(hlc.new_timestamp());
let data = metadata
.serialize()
.wrap_err("failed to serialize stop message")?;
manual_stop_publisher
.publish(&data)
.map_err(|err| eyre::eyre!(err))
.wrap_err("failed to send stop message")?;
todo!("instruct dora-daemon/dora-coordinator to stop other nodes");
// manual_stop_publisher
// .publish(&data)
// .map_err(|err| eyre::eyre!(err))
// .wrap_err("failed to send stop message")?;
break;
}
if let Some(stop_publisher) = operator_stop_publishers.remove(&id) {
tracing::info!("operator {node_id}/{id} finished ({reason:?})");
stopped_operators.insert(id.clone());
// send stopped message
tokio::task::spawn_blocking(move || stop_publisher.publish(&[]))
.await
.wrap_err("failed to join stop publish task")?
.map_err(|err| eyre::eyre!(err))
.with_context(|| {
format!(
"failed to send stop message for operator `{node_id}/{id}`"
)
})?;
if operator_stop_publishers.is_empty() {
break;
}
} else {
tracing::warn!("no stop publisher for {id}");

let Some(config) = operators.get(&operator_id) else {
tracing::warn!("received Finished event for unknown operator `{operator_id}`");
continue;
};
let outputs = config
.outputs
.iter()
.map(|output_id| operator_output_id(&operator_id, output_id))
.collect();
let result;
(node, result) = tokio::task::spawn_blocking(move || {
let result = node.close_outputs(outputs);
(node, result)
})
.await
.wrap_err("failed to wait for close_outputs task")?;
result.wrap_err("failed to close outputs of finished operator")?;

operator_channels.remove(&operator_id);

if operator_channels.is_empty() {
break;
}
}
OperatorEvent::Output {
output_id,
metadata,
data,
} => {
let output_id = operator_output_id(&operator_id, &output_id);
let result;
(node, result) = tokio::task::spawn_blocking(move || {
let result = node.send_output(output_id, metadata, data.len(), |buf| {
buf.copy_from_slice(&data);
});
(node, result)
})
.await
.wrap_err("failed to wait for send_output task")?;
result.wrap_err("failed to send node output")?;
}
}
}
Event::Stop => {
// forward stop event to all operators and close the event channels
for (_, channel) in operator_channels.drain() {
let _ = channel.send_async(operator::IncomingEvent::Stop).await;
}
}
Event::Input { id, metadata, data } => {
let Some((operator_id, input_id)) = id.as_str().split_once('/') else {
tracing::warn!("received non-operator input {id}");
continue;
};
let operator_id = OperatorId::from(operator_id.to_owned());
let input_id = DataId::from(input_id.to_owned());
let Some(operator_channel) = operator_channels.get(&operator_id) else {
tracing::warn!("received input {id} for unknown operator");
continue;
};

if let Err(err) = operator_channel
.send_async(operator::IncomingEvent::Input {
input_id: input_id.clone(),
metadata,
data,
})
.await
.wrap_err_with(|| {
format!("failed to send input `{input_id}` to operator `{operator_id}`")
})
{
tracing::warn!("{err}");
}
}
Event::InputClosed(id) => {
let Some((operator_id, input_id)) = id.as_str().split_once('/') else {
tracing::warn!("received InputClosed event for non-operator input {id}");
continue;
};
let operator_id = OperatorId::from(operator_id.to_owned());
let input_id = DataId::from(input_id.to_owned());

let Some(operator_channel) = operator_channels.get(&operator_id) else {
tracing::warn!("received input {id} for unknown operator");
continue;
};
if let Err(err) = operator_channel
.send_async(operator::IncomingEvent::InputClosed {
input_id: input_id.clone(),
})
.await
.wrap_err_with(|| {
format!(
"failed to send InputClosed({input_id}) to operator `{operator_id}`"
)
})
{
tracing::warn!("{err}");
}

if let Some(open_inputs) = open_operator_inputs.get_mut(&operator_id) {
open_inputs.remove(&input_id);
if open_inputs.is_empty() {
// all inputs of the node were closed -> close its event channel
tracing::info!("all inputs of operator {}/{operator_id} were closed -> closing event channel", node.id());
open_operator_inputs.remove(&operator_id);
operator_channels.remove(&operator_id);
}
}
}
Event::Error(err) => eyre::bail!("received error event: {err}"),
}
}

@@ -173,24 +274,24 @@ async fn run(
Ok(())
}

fn publisher(
self_id: &NodeId,
operator_id: OperatorId,
output_id: DataId,
communication: &mut dyn CommunicationLayer,
) -> eyre::Result<Box<dyn Publisher>> {
let topic = format!("{self_id}/{operator_id}/{output_id}");
communication
.publisher(&topic)
.map_err(|err| eyre::eyre!(err))
.wrap_err_with(|| format!("failed to create publisher for output {output_id}"))
fn operator_output_id(operator_id: &OperatorId, output_id: &DataId) -> DataId {
DataId::from(format!("{operator_id}/{output_id}"))
}

#[derive(Debug)]
enum Event {
Operator {
id: OperatorId,
event: OperatorEvent,
},
Stop,
Input {
id: dora_core::config::DataId,
metadata: dora_core::message::Metadata<'static>,
data: Option<Vec<u8>>,
},
InputClosed(dora_core::config::DataId),
Error(String),
}

fn set_up_tracing() -> eyre::Result<()> {


+ 127
- 0
binaries/runtime/src/operator/channel.rs View File

@@ -0,0 +1,127 @@
use super::IncomingEvent;
use futures::{
future::{self, FusedFuture},
FutureExt,
};
use std::collections::VecDeque;

pub fn channel(
runtime: &tokio::runtime::Handle,
) -> (flume::Sender<IncomingEvent>, flume::Receiver<IncomingEvent>) {
let (incoming_tx, incoming_rx) = flume::bounded(10);
let (outgoing_tx, outgoing_rx) = flume::bounded(0);

runtime.spawn(async {
let mut buffer = InputBuffer::new();
buffer.run(incoming_rx, outgoing_tx).await;
});

(incoming_tx, outgoing_rx)
}

struct InputBuffer {
queue: VecDeque<IncomingEvent>,
max_queue_len: usize,
}

impl InputBuffer {
pub fn new() -> Self {
Self {
queue: VecDeque::new(),
max_queue_len: 10,
}
}

pub async fn run(
&mut self,
incoming: flume::Receiver<IncomingEvent>,
outgoing: flume::Sender<IncomingEvent>,
) {
let mut send_out_buf = future::Fuse::terminated();
let mut incoming_closed = false;
loop {
let next_incoming = if incoming_closed {
future::Fuse::terminated()
} else {
incoming.recv_async().fuse()
};
match future::select(next_incoming, send_out_buf).await {
future::Either::Left((event, mut send_out)) => {
match event {
Ok(event) => {
// received a new event -> push it to the queue
self.add_event(event);

// if outgoing queue is empty, fill it again
if send_out.is_terminated() {
send_out = self.send_next_queued(&outgoing);
}
}
Err(flume::RecvError::Disconnected) => {
incoming_closed = true;
}
}

// reassign the send_out future, which might be still in progress
send_out_buf = send_out;
}
future::Either::Right((send_result, _)) => match send_result {
Ok(()) => {
send_out_buf = self.send_next_queued(&outgoing);
}
Err(flume::SendError(_)) => break,
},
};
if incoming_closed && send_out_buf.is_terminated() && self.queue.is_empty() {
break;
}
}
}

fn send_next_queued<'a>(
&mut self,
outgoing: &'a flume::Sender<IncomingEvent>,
) -> future::Fuse<flume::r#async::SendFut<'a, IncomingEvent>> {
if let Some(next) = self.queue.pop_front() {
outgoing.send_async(next).fuse()
} else {
future::Fuse::terminated()
}
}

fn add_event(&mut self, event: IncomingEvent) {
self.queue.push_back(event);

// drop oldest input events to maintain max queue length queue
let input_event_count = self
.queue
.iter()
.filter(|e| matches!(e, IncomingEvent::Input { .. }))
.count();
let drop_n = input_event_count.saturating_sub(self.max_queue_len);
if drop_n > 0 {
self.drop_oldest_inputs(drop_n);
}
}

fn drop_oldest_inputs(&mut self, number: usize) {
tracing::debug!("dropping {number} operator inputs because event queue is too full");
for i in 0..number {
// find index of oldest input event
let index = self
.queue
.iter()
.position(|e| matches!(e, IncomingEvent::Input { .. }))
.unwrap_or_else(|| panic!("no input event found in drop iteration {i}"));

// remove that event
self.queue.remove(index);
}
}
}

impl Default for InputBuffer {
fn default() -> Self {
Self::new()
}
}

+ 77
- 37
binaries/runtime/src/operator/mod.rs View File

@@ -1,52 +1,32 @@
use dora_core::{
config::NodeId,
config::{DataId, NodeId},
descriptor::{OperatorDefinition, OperatorSource},
message::{Metadata, MetadataParameters},
};
use dora_node_api::communication::{self, CommunicationLayer};
use dora_operator_api_python::metadata_to_pydict;
use eyre::Context;
#[cfg(feature = "tracing")]
use opentelemetry::sdk::trace::Tracer;
use pyo3::{
types::{PyBytes, PyDict},
IntoPy, PyObject, Python,
};
use std::any::Any;
use tokio::sync::mpsc::Sender;

#[cfg(not(feature = "tracing"))]
type Tracer = ();

pub mod channel;
mod python;
mod shared_lib;

#[tracing::instrument(skip(communication))]
pub fn spawn_operator(
pub fn run_operator(
node_id: &NodeId,
operator_definition: OperatorDefinition,
incoming_events: flume::Receiver<IncomingEvent>,
events_tx: Sender<OperatorEvent>,
communication: &mut dyn CommunicationLayer,
) -> eyre::Result<()> {
let inputs = communication::subscribe_all(communication, &operator_definition.config.inputs)
.wrap_err_with(|| {
format!(
"failed to subscribe to inputs of operator {}",
operator_definition.id
)
})?;

let publishers = operator_definition
.config
.outputs
.iter()
.map(|output_id| {
let topic = format!(
"{node_id}/{operator_id}/{output_id}",
operator_id = operator_definition.id
);
communication
.publisher(&topic)
.map_err(|err| eyre::eyre!(err))
.wrap_err_with(|| format!("failed to create publisher for output {output_id}"))
.map(|p| (output_id.to_owned(), p))
})
.collect::<Result<_, _>>()?;

#[cfg(feature = "tracing")]
let tracer =
dora_tracing::init_tracing(format!("{node_id}/{}", operator_definition.id).as_str())
@@ -57,13 +37,12 @@ pub fn spawn_operator(

match &operator_definition.config.source {
OperatorSource::SharedLibrary(source) => {
shared_lib::spawn(
shared_lib::run(
node_id,
&operator_definition.id,
source,
events_tx,
inputs,
publishers,
incoming_events,
tracer,
)
.wrap_err_with(|| {
@@ -74,13 +53,12 @@ pub fn spawn_operator(
})?;
}
OperatorSource::Python(source) => {
python::spawn(
python::run(
node_id,
&operator_definition.id,
source,
events_tx,
inputs,
publishers,
incoming_events,
tracer,
)
.wrap_err_with(|| {
@@ -97,10 +75,72 @@ pub fn spawn_operator(
Ok(())
}

#[derive(Debug)]
pub enum OperatorEvent {
Output {
output_id: DataId,
metadata: MetadataParameters<'static>,
data: Vec<u8>,
},
Error(eyre::Error),
Panic(Box<dyn Any + Send>),
Finished { reason: StopReason },
Finished {
reason: StopReason,
},
}

#[derive(Debug)]
pub enum IncomingEvent {
Stop,
Input {
input_id: DataId,
metadata: Metadata<'static>,
data: Option<Vec<u8>>,
},
InputClosed {
input_id: DataId,
},
}

impl IntoPy<PyObject> for IncomingEvent {
fn into_py(self, py: Python) -> PyObject {
let dict = PyDict::new(py);

let ty = match self {
Self::Stop => "STOP",
Self::Input {
input_id,
metadata,
data,
} => {
dict.set_item("id", input_id.to_string())
.wrap_err("failed to add input ID")
.unwrap();
dict.set_item(
"data",
PyBytes::new(py, data.as_deref().unwrap_or_default()),
)
.wrap_err("failed to add input data")
.unwrap();
dict.set_item("metadata", metadata_to_pydict(&metadata, py))
.wrap_err("failed to add input metadata")
.unwrap();
"INPUT"
}
Self::InputClosed { input_id } => {
dict.set_item("id", input_id.to_string())
.wrap_err("failed to add input ID")
.unwrap();
"INPUT_CLOSED"
}
};

dict.set_item("type", ty)
.wrap_err("could not make type a python dictionary item")
.unwrap();

dict.into()
}
}

#[derive(Debug)]


+ 64
- 85
binaries/runtime/src/operator/python.rs View File

@@ -1,50 +1,37 @@
#![allow(clippy::borrow_deref_ref)] // clippy warns about code generated by #[pymethods]

use super::{OperatorEvent, StopReason, Tracer};
use super::{IncomingEvent, OperatorEvent, StopReason, Tracer};
use dora_core::{
config::{DataId, NodeId, OperatorId},
config::{NodeId, OperatorId},
descriptor::source_is_url,
};
use dora_download::download_file;
use dora_message::uhlc;
use dora_node_api::communication::Publisher;
use dora_operator_api_python::metadata_to_pydict;
use dora_operator_api_types::DoraStatus;
use eyre::{bail, eyre, Context, Result};
use pyo3::{
pyclass,
types::IntoPyDict,
types::{PyBytes, PyDict},
Py, Python,
};
use pyo3::{pyclass, types::IntoPyDict, IntoPy, Py, Python};
use std::{
borrow::Cow,
collections::HashMap,
panic::{catch_unwind, AssertUnwindSafe},
path::Path,
sync::Arc,
};
use tokio::sync::mpsc::Sender;

fn traceback(err: pyo3::PyErr) -> eyre::Report {
Python::with_gil(|py| {
eyre::Report::msg(format!(
"{}\n{err}",
err.traceback(py)
.expect("PyError should have a traceback")
.format()
.expect("Traceback could not be formatted")
))
})
let traceback = Python::with_gil(|py| err.traceback(py).and_then(|t| t.format().ok()));
if let Some(traceback) = traceback {
eyre::eyre!("{err}:\n{traceback}")
} else {
eyre::eyre!("{err}")
}
}

pub fn spawn(
#[tracing::instrument(skip(events_tx, incoming_events, tracer))]
pub fn run(
node_id: &NodeId,
operator_id: &OperatorId,
source: &str,
events_tx: Sender<OperatorEvent>,
inputs: flume::Receiver<dora_node_api::Input>,
publishers: HashMap<DataId, Box<dyn Publisher>>,
incoming_events: flume::Receiver<IncomingEvent>,
tracer: Tracer,
) -> eyre::Result<()> {
let path = if source_is_url(source) {
@@ -71,8 +58,7 @@ pub fn spawn(
let path_cloned = path.clone();

let send_output = SendOutputCallback {
publishers: Arc::new(publishers),
hlc: Arc::new(uhlc::HLC::default()),
events_tx: events_tx.clone(),
};

let init_operator = move |py: Python| {
@@ -114,29 +100,34 @@ pub fn spawn(
Python::with_gil(init_operator).wrap_err("failed to init python operator")?;

let reason = loop {
let Ok(mut input) = inputs.recv() else {break StopReason::InputsClosed };

#[cfg(feature = "tracing")]
let (_child_cx, string_cx) = {
use dora_tracing::{deserialize_context, serialize_context};
use opentelemetry::trace::TraceContextExt;
use opentelemetry::{trace::Tracer, Context as OtelContext};

let cx = deserialize_context(&input.metadata.parameters.open_telemetry_context);
let span = tracer.start_with_context(format!("{}", input.id), &cx);

let child_cx = OtelContext::current_with_span(span);
let string_cx = serialize_context(&child_cx);
(child_cx, string_cx)
};

#[cfg(not(feature = "tracing"))]
let string_cx = {
let () = tracer;
"".to_string()
};
input.metadata.parameters.open_telemetry_context = Cow::Owned(string_cx);

let Ok(mut event) = incoming_events.recv() else { break StopReason::InputsClosed };

if let IncomingEvent::Input {
input_id, metadata, ..
} = &mut event
{
#[cfg(feature = "tracing")]
let (_child_cx, string_cx) = {
use dora_tracing::{deserialize_context, serialize_context};
use opentelemetry::trace::TraceContextExt;
use opentelemetry::{trace::Tracer, Context as OtelContext};

let cx = deserialize_context(&metadata.parameters.open_telemetry_context);
let span = tracer.start_with_context(format!("{}", input_id), &cx);

let child_cx = OtelContext::current_with_span(span);
let string_cx = serialize_context(&child_cx);
(child_cx, string_cx)
};

#[cfg(not(feature = "tracing"))]
let string_cx = {
let _ = input_id;
let () = tracer;
"".to_string()
};
metadata.parameters.open_telemetry_context = Cow::Owned(string_cx);
}
let status = Python::with_gil(|py| -> Result<i32> {
// We need to create a new scoped `GILPool` because the dora-runtime
// is currently started through a `start_runtime` wrapper function,
@@ -149,29 +140,21 @@ pub fn spawn(
// https://github.com/PyO3/pyo3/issues/2853 for more details.
let pool = unsafe { py.new_pool() };
let py = pool.python();
let input_dict = PyDict::new(py);
let bytes = PyBytes::new(py, &input.data());

input_dict.set_item("id", input.id.as_str())?;
input_dict.set_item("data", bytes)?;
input_dict.set_item("metadata", metadata_to_pydict(input.metadata(), py))?;
let input_dict = event.into_py(py);

let status_enum = operator
.call_method1(py, "on_input", (input_dict, send_output.clone()))
.call_method1(py, "on_event", (input_dict, send_output.clone()))
.map_err(traceback)?;

let status_val = status_enum
.getattr(py, "value")
.wrap_err("on_input must have enum return value")?;
status_val
.extract(py)
.wrap_err("on_input has invalid return value")
let status_val = Python::with_gil(|py| status_enum.getattr(py, "value"))
.wrap_err("on_event must have enum return value")?;
Python::with_gil(|py| status_val.extract(py))
.wrap_err("on_event has invalid return value")
})?;
match status {
s if s == DoraStatus::Continue as i32 => {} // ok
s if s == DoraStatus::Stop as i32 => break StopReason::ExplicitStop,
s if s == DoraStatus::StopAll as i32 => break StopReason::ExplicitStopAll,
other => bail!("on_input returned invalid status {other}"),
other => bail!("on_event returned invalid status {other}"),
}
};

@@ -207,15 +190,15 @@ pub fn spawn(
#[pyclass]
#[derive(Clone)]
struct SendOutputCallback {
publishers: Arc<HashMap<DataId, Box<dyn Publisher>>>,
hlc: Arc<uhlc::HLC>,
events_tx: Sender<OperatorEvent>,
}

#[allow(unsafe_op_in_unsafe_fn)]
mod callback_impl {

use crate::operator::OperatorEvent;

use super::SendOutputCallback;
use dora_message::Metadata;
use dora_operator_api_python::pydict_to_metadata;
use eyre::{eyre, Context, Result};
use pyo3::{
@@ -232,25 +215,21 @@ mod callback_impl {
metadata: Option<&PyDict>,
) -> Result<()> {
let data = data.as_bytes();
let parameters = pydict_to_metadata(metadata).wrap_err("Could not parse metadata.")?;
let metadata = Metadata::from_parameters(self.hlc.new_timestamp(), parameters);
let mut message = metadata
.serialize()
.context(format!("failed to serialize `{}` metadata", output))?;
let metadata = pydict_to_metadata(metadata)
.wrap_err("Could not parse metadata.")?
.into_owned();

let event = OperatorEvent::Output {
output_id: output.to_owned().into(),
metadata,
data: data.to_owned(),
};

match self.publishers.get(output) {
Some(publisher) => {
message.extend_from_slice(data);
self.events_tx
.blocking_send(event)
.map_err(|_| eyre!("failed to send output to runtime"))?;

publisher
.publish(&message)
.map_err(|err| eyre::eyre!(err))
.context("publish failed")
}
None => Err(eyre!(
"unexpected output {output} (not defined in dataflow config)"
)),
}
Ok(())
}
}
}

+ 113
- 94
binaries/runtime/src/operator/shared_lib.rs View File

@@ -1,37 +1,32 @@
use super::{OperatorEvent, StopReason, Tracer};
use super::{IncomingEvent, OperatorEvent, StopReason, Tracer};
use dora_core::{
adjust_shared_library_path,
config::{DataId, NodeId, OperatorId},
descriptor::source_is_url,
};
use dora_download::download_file;
use dora_message::uhlc;
use dora_node_api::communication::Publisher;
use dora_node_api::MetadataParameters;
use dora_operator_api_types::{
safer_ffi::closure::ArcDynFn1, DoraDropOperator, DoraInitOperator, DoraInitResult, DoraOnInput,
DoraResult, DoraStatus, Metadata, OnInputResult, Output, SendOutput,
safer_ffi::closure::ArcDynFn1, DoraDropOperator, DoraInitOperator, DoraInitResult, DoraOnEvent,
DoraResult, DoraStatus, Metadata, OnEventResult, Output, SendOutput,
};
use eyre::{bail, eyre, Context};
use flume::Receiver;
use libloading::Symbol;
use std::{
collections::HashMap,
borrow::Cow,
ffi::c_void,
ops::Deref,
panic::{catch_unwind, AssertUnwindSafe},
path::Path,
sync::Arc,
thread,
};
use tokio::sync::mpsc::Sender;

pub fn spawn(
pub fn run(
node_id: &NodeId,
operator_id: &OperatorId,
source: &str,
events_tx: Sender<OperatorEvent>,
inputs: Receiver<dora_node_api::Input>,
publishers: HashMap<DataId, Box<dyn Publisher>>,
incoming_events: flume::Receiver<IncomingEvent>,
tracer: Tracer,
) -> eyre::Result<()> {
let path = if source_is_url(source) {
@@ -55,49 +50,42 @@ pub fn spawn(
libloading::Library::new(&path)
.wrap_err_with(|| format!("failed to load shared library at `{}`", path.display()))?
};
let hlc = uhlc::HLC::default();

thread::spawn(move || {
let closure = AssertUnwindSafe(|| {
let bindings = Bindings::init(&library).context("failed to init operator")?;
let closure = AssertUnwindSafe(|| {
let bindings = Bindings::init(&library).context("failed to init operator")?;

let operator = SharedLibraryOperator {
inputs,
bindings,
hlc,
};
let operator = SharedLibraryOperator {
incoming_events,
bindings,
events_tx: events_tx.clone(),
};

operator.run(publishers, tracer)
});
match catch_unwind(closure) {
Ok(Ok(reason)) => {
let _ = events_tx.blocking_send(OperatorEvent::Finished { reason });
}
Ok(Err(err)) => {
let _ = events_tx.blocking_send(OperatorEvent::Error(err));
}
Err(panic) => {
let _ = events_tx.blocking_send(OperatorEvent::Panic(panic));
}
}
operator.run(tracer)
});
match catch_unwind(closure) {
Ok(Ok(reason)) => {
let _ = events_tx.blocking_send(OperatorEvent::Finished { reason });
}
Ok(Err(err)) => {
let _ = events_tx.blocking_send(OperatorEvent::Error(err));
}
Err(panic) => {
let _ = events_tx.blocking_send(OperatorEvent::Panic(panic));
}
}

Ok(())
}

struct SharedLibraryOperator<'lib> {
inputs: Receiver<dora_node_api::Input>,
incoming_events: flume::Receiver<IncomingEvent>,
events_tx: Sender<OperatorEvent>,

bindings: Bindings<'lib>,
hlc: uhlc::HLC,
}

impl<'lib> SharedLibraryOperator<'lib> {
fn run(
self,
publishers: HashMap<DataId, Box<dyn Publisher>>,
tracer: Tracer,
) -> eyre::Result<StopReason> {
fn run(self, tracer: Tracer) -> eyre::Result<StopReason> {
let operator_context = {
let DoraInitResult {
result,
@@ -115,32 +103,27 @@ impl<'lib> SharedLibraryOperator<'lib> {

let send_output_closure = Arc::new(move |output: Output| {
let Output {
id,
id: output_id,
data,
metadata: Metadata {
open_telemetry_context,
},
} = output;
let mut metadata = dora_node_api::Metadata::new(self.hlc.new_timestamp());
metadata.parameters.open_telemetry_context =
String::from(open_telemetry_context).into();

let message = metadata
.serialize()
.context(format!("failed to serialize `{}` metadata", id.deref()))
.map_err(|err| err.into());

let result = message.and_then(|mut message| match publishers.get(id.deref()) {
Some(publisher) => {
message.extend_from_slice(&data); // TODO avoid copy
publisher.publish(&message)
}
None => Err(eyre!(
"unexpected output {} (not defined in dataflow config)",
id.deref()
)
.into()),
});
let metadata = MetadataParameters {
open_telemetry_context: Cow::Owned(open_telemetry_context.into()),
..Default::default()
};

let event = OperatorEvent::Output {
output_id: DataId::from(String::from(output_id)),
metadata,
data: data.to_owned(),
};

let result = self
.events_tx
.blocking_send(event)
.map_err(|_| eyre!("failed to send output to runtime"));

let error = match result {
Ok(()) => None,
@@ -151,47 +134,83 @@ impl<'lib> SharedLibraryOperator<'lib> {
});

let reason = loop {
let Ok(input) = self.inputs.recv() else {
let Ok(mut event) = self.incoming_events.recv() else {
break StopReason::InputsClosed
};
#[cfg(feature = "tracing")]
let (_child_cx, string_cx) = {
use dora_tracing::{deserialize_context, serialize_context};
use opentelemetry::{
trace::{TraceContextExt, Tracer},
Context as OtelContext,

if let IncomingEvent::Input {
input_id, metadata, ..
} = &mut event
{
#[cfg(feature = "tracing")]
let (_child_cx, string_cx) = {
use dora_tracing::{deserialize_context, serialize_context};
use opentelemetry::{
trace::{TraceContextExt, Tracer},
Context as OtelContext,
};

let span = tracer.start_with_context(
format!("{}", input.id),
&deserialize_context(&input.metadata.parameters.open_telemetry_context),
);
let child_cx = OtelContext::current_with_span(span);
let string_cx = serialize_context(&child_cx);
(child_cx, string_cx)
};
#[cfg(not(feature = "tracing"))]
let string_cx = {
let () = tracer;
let _ = input_id;
"".to_string()
};
metadata.parameters.open_telemetry_context = Cow::Owned(string_cx);
}

let span = tracer.start_with_context(
format!("{}", input.id),
&deserialize_context(&input.metadata.parameters.open_telemetry_context),
);
let child_cx = OtelContext::current_with_span(span);
let string_cx = serialize_context(&child_cx);
(child_cx, string_cx)
};
#[cfg(not(feature = "tracing"))]
let string_cx = {
let () = tracer;
"".to_string()
};
let operator_input = dora_operator_api_types::Input {
data: input.data().into_owned().into(),
id: String::from(input.id).into(),
metadata: Metadata {
open_telemetry_context: string_cx.into(),
let operator_event = match event {
IncomingEvent::Stop => dora_operator_api_types::RawEvent {
input: None,
input_closed: None,
stop: true,
},
IncomingEvent::Input {
input_id,
metadata,
data,
} => {
let operator_input = dora_operator_api_types::Input {
id: String::from(input_id).into(),
data: data.unwrap_or_default().into(),
metadata: Metadata {
open_telemetry_context: metadata
.parameters
.open_telemetry_context
.into_owned()
.into(),
},
};
dora_operator_api_types::RawEvent {
input: Some(Box::new(operator_input).into()),
input_closed: None,
stop: false,
}
}
IncomingEvent::InputClosed { input_id } => dora_operator_api_types::RawEvent {
input_closed: Some(input_id.to_string().into()),
input: None,
stop: false,
},
};

let send_output = SendOutput {
send_output: ArcDynFn1::new(send_output_closure.clone()),
};
let OnInputResult {
let OnEventResult {
result: DoraResult { error },
status,
} = unsafe {
(self.bindings.on_input.on_input)(
&operator_input,
(self.bindings.on_event.on_event)(
&operator_event,
&send_output,
operator_context.raw,
)
@@ -223,7 +242,7 @@ impl<'lib> Drop for OperatorContext<'lib> {
struct Bindings<'lib> {
init_operator: Symbol<'lib, DoraInitOperator>,
drop_operator: Symbol<'lib, DoraDropOperator>,
on_input: Symbol<'lib, DoraOnInput>,
on_event: Symbol<'lib, DoraOnEvent>,
}

impl<'lib> Bindings<'lib> {
@@ -236,9 +255,9 @@ impl<'lib> Bindings<'lib> {
drop_operator: library
.get(b"dora_drop_operator")
.wrap_err("failed to get `dora_drop_operator`")?,
on_input: library
.get(b"dora_on_input")
.wrap_err("failed to get `dora_on_input`")?,
on_event: library
.get(b"dora_on_event")
.wrap_err("failed to get `dora_on_event`")?,
}
};
Ok(bindings)


+ 19
- 0
examples/benchmark/dataflow.yml View File

@@ -0,0 +1,19 @@
communication:
zenoh:
prefix: /benchmark-example

nodes:
- id: rust-node
custom:
build: cargo build -p benchmark-example-node --release
source: ../../target/release/benchmark-example-node
outputs:
- latency
- throughput
- id: rust-sink
custom:
build: cargo build -p benchmark-example-sink --release
source: ../../target/release/benchmark-example-sink
inputs:
latency: rust-node/latency
throughput: rust-node/throughput

examples/iceoryx/sink/Cargo.toml → examples/benchmark/node/Cargo.toml View File

@@ -1,12 +1,15 @@
[package]
name = "iceoryx-example-sink"
name = "benchmark-example-node"
version.workspace = true
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
dora-node-api = { workspace = true, features = ["iceoryx"] }
dora-node-api = { workspace = true }
eyre = "0.6.8"
futures = "0.3.21"
tokio = { version = "1.24.2", features = ["macros"] }
rand = "0.8.5"
tokio = { version = "1.20.1", features = ["rt", "macros"] }
tracing = "0.1.36"
tracing-subscriber = "0.3.15"

+ 72
- 0
examples/benchmark/node/src/main.rs View File

@@ -0,0 +1,72 @@
use dora_node_api::{self, dora_core::config::DataId, DoraNode};
use eyre::Context;
use rand::Rng;
use std::time::Duration;
use tracing_subscriber::Layer;

fn main() -> eyre::Result<()> {
set_up_tracing().wrap_err("failed to set up tracing subscriber")?;

let latency = DataId::from("latency".to_owned());
let throughput = DataId::from("throughput".to_owned());

let (mut node, _events) = DoraNode::init_from_env()?;
let sizes = [
0,
8,
64,
512,
2048,
4096,
4 * 4096,
10 * 4096,
100 * 4096,
1000 * 4096,
10000 * 4096,
];

// test latency first
for size in sizes {
for _ in 0..100 {
let data: Vec<u8> = rand::thread_rng()
.sample_iter(rand::distributions::Standard)
.take(size)
.collect();
node.send_output(latency.clone(), Default::default(), data.len(), |out| {
out.copy_from_slice(&data);
})?;

// sleep a bit to avoid queue buildup
std::thread::sleep(Duration::from_millis(10));
}
}

// wait a bit to ensure that all throughput messages reached their target
std::thread::sleep(Duration::from_secs(2));

// then throughput with full speed
for size in sizes {
for _ in 0..100 {
let data: Vec<u8> = rand::thread_rng()
.sample_iter(rand::distributions::Standard)
.take(size)
.collect();
node.send_output(throughput.clone(), Default::default(), data.len(), |out| {
out.copy_from_slice(&data);
})?;
}
}

Ok(())
}

fn set_up_tracing() -> eyre::Result<()> {
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;

let stdout_log = tracing_subscriber::fmt::layer()
.pretty()
.with_filter(tracing::metadata::LevelFilter::DEBUG);
let subscriber = tracing_subscriber::Registry::default().with(stdout_log);
tracing::subscriber::set_global_default(subscriber)
.context("failed to set tracing global subscriber")
}

+ 43
- 0
examples/benchmark/run.rs View File

@@ -0,0 +1,43 @@
use eyre::{bail, Context};
use std::path::Path;
use tracing::metadata::LevelFilter;
use tracing_subscriber::Layer;

#[tokio::main]
async fn main() -> eyre::Result<()> {
set_up_tracing().wrap_err("failed to set up tracing subscriber")?;

let root = Path::new(env!("CARGO_MANIFEST_DIR"));
std::env::set_current_dir(root.join(file!()).parent().unwrap())
.wrap_err("failed to set working dir")?;

let dataflow = Path::new("dataflow.yml");
build_dataflow(dataflow).await?;

dora_daemon::Daemon::run_dataflow(dataflow, None).await?;

Ok(())
}

async fn build_dataflow(dataflow: &Path) -> eyre::Result<()> {
let cargo = std::env::var("CARGO").unwrap();
let mut cmd = tokio::process::Command::new(&cargo);
cmd.arg("run");
cmd.arg("--package").arg("dora-cli");
cmd.arg("--").arg("build").arg(dataflow);
if !cmd.status().await?.success() {
bail!("failed to build dataflow");
};
Ok(())
}

fn set_up_tracing() -> eyre::Result<()> {
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;

let stdout_log = tracing_subscriber::fmt::layer()
.pretty()
.with_filter(LevelFilter::DEBUG);
let subscriber = tracing_subscriber::Registry::default().with(stdout_log);
tracing::subscriber::set_global_default(subscriber)
.context("failed to set tracing global subscriber")
}

examples/iceoryx/node/Cargo.toml → examples/benchmark/sink/Cargo.toml View File

@@ -1,11 +1,12 @@
[package]
name = "iceoryx-example-node"
name = "benchmark-example-sink"
version.workspace = true
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
dora-node-api = { workspace = true, features = ["iceoryx"] }
dora-node-api = { workspace = true }
eyre = "0.6.8"
rand = "0.8.5"
tracing = "0.1.36"
tracing-subscriber = "0.3.15"

+ 98
- 0
examples/benchmark/sink/src/main.rs View File

@@ -0,0 +1,98 @@
use dora_node_api::{self, DoraNode, Event};
use eyre::Context;
use std::time::{Duration, Instant};
use tracing_subscriber::Layer;

fn main() -> eyre::Result<()> {
set_up_tracing().wrap_err("failed to set up tracing subscriber")?;

let (_node, mut events) = DoraNode::init_from_env()?;

// latency is tested first
let mut latency = true;

let mut current_size = 0;
let mut n = 0;
let mut start = Instant::now();
let mut latencies = Vec::new();

println!("Latency:");

while let Some(event) = events.recv() {
match event {
Event::Input { id, metadata, data } => {
// check if new size bracket
let data_len = data.map(|d| d.len()).unwrap_or_default();
if data_len != current_size {
if n > 0 {
record_results(start, current_size, n, latencies, latency);
}
current_size = data_len;
n = 0;
start = Instant::now();
latencies = Vec::new();
}

match id.as_str() {
"latency" if latency => {}
"throughput" if latency => {
latency = false;
println!("Throughput:");
}
"throughput" => {}
other => {
eprintln!("Ignoring unexpected input `{other}`");
continue;
}
}

n += 1;
latencies.push(
metadata
.timestamp()
.get_time()
.to_system_time()
.elapsed()
.unwrap_or_default(),
);
}
Event::InputClosed { id } => {
println!("Input `{id}` was closed");
}
other => eprintln!("Received unexpected input: {other:?}"),
}
}

record_results(start, current_size, n, latencies, latency);

Ok(())
}

fn record_results(
start: Instant,
current_size: usize,
n: u32,
latencies: Vec<Duration>,
latency: bool,
) {
let msg = if latency {
let avg_latency = latencies.iter().sum::<Duration>() / n;
format!("size {current_size:<#8x}: {avg_latency:?}")
} else {
let duration = start.elapsed();
let msg_per_sec = n as f64 / duration.as_secs_f64();
format!("size {current_size:<#8x}: {msg_per_sec:.0} messages per second")
};
println!("{msg}");
}

fn set_up_tracing() -> eyre::Result<()> {
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;

let stdout_log = tracing_subscriber::fmt::layer()
.pretty()
.with_filter(tracing::metadata::LevelFilter::DEBUG);
let subscriber = tracing_subscriber::Registry::default().with(stdout_log);
tracing::subscriber::set_global_default(subscriber)
.context("failed to set tracing global subscriber")
}

+ 4
- 2
examples/c++-dataflow/dataflow.yml View File

@@ -18,7 +18,7 @@ nodes:
outputs:
- counter

- id: runtime-node
- id: runtime-node-1
operators:
- id: operator-rust-api
shared-library: build/operator_rust_api
@@ -27,9 +27,11 @@ nodes:
counter_2: cxx-node-rust-api/counter
outputs:
- status
- id: runtime-node-2
operators:
- id: operator-c-api
shared-library: build/operator_c_api
inputs:
op_status: runtime-node/operator-rust-api/status
op_status: runtime-node-1/operator-rust-api/status
outputs:
- half-status

+ 46
- 32
examples/c++-dataflow/node-c-api/main.cc View File

@@ -12,49 +12,61 @@ int run(void *dora_context)

for (int i = 0; i < 20; i++)
{

auto input = dora_next_input(dora_context);
if (input == NULL)
void *event = dora_next_event(dora_context);
if (event == NULL)
{
return 0; // end of input
printf("[c node] ERROR: unexpected end of event\n");
return -1;
}
counter += 1;

char *id_ptr;
size_t id_len;
read_dora_input_id(input, &id_ptr, &id_len);
std::string id(id_ptr, id_len);
enum DoraEventType ty = read_dora_event_type(event);

char *data_ptr;
size_t data_len;
read_dora_input_data(input, &data_ptr, &data_len);
std::vector<unsigned char> data;
for (size_t i = 0; i < data_len; i++)
if (ty == DoraEventType_Input)
{
data.push_back(*(data_ptr + i));
}

std::cout
<< "Received input "
<< " (counter: " << (unsigned int)counter << ") data: [";
for (unsigned char &v : data)
{
std::cout << (unsigned int)v << ", ";
}
std::cout << "]" << std::endl;
counter += 1;

free_dora_input(input);
char *id_ptr;
size_t id_len;
read_dora_input_id(event, &id_ptr, &id_len);
std::string id(id_ptr, id_len);

std::vector<unsigned char> out_vec{counter};
char *data_ptr;
size_t data_len;
read_dora_input_data(event, &data_ptr, &data_len);
std::vector<unsigned char> data;
for (size_t i = 0; i < data_len; i++)
{
data.push_back(*(data_ptr + i));
}

std::string out_id = "counter";
std::cout
<< "Received input "
<< " (counter: " << (unsigned int)counter << ") data: [";
for (unsigned char &v : data)
{
std::cout << (unsigned int)v << ", ";
}
std::cout << "]" << std::endl;

int result = dora_send_output(dora_context, &out_id[0], out_id.length(), (char *)&counter, 1);
if (result != 0)
std::vector<unsigned char> out_vec{counter};
std::string out_id = "counter";
int result = dora_send_output(dora_context, &out_id[0], out_id.length(), (char *)&counter, 1);
if (result != 0)
{
std::cerr << "failed to send output" << std::endl;
return 1;
}
}
else if (ty == DoraEventType_Stop)
{
printf("[c node] received stop event\n");
}
else
{
std::cerr << "failed to send output" << std::endl;
return 1;
printf("[c node] received unexpected event: %d\n", ty);
}

free_dora_event(event);
}
return 0;
}
@@ -67,5 +79,7 @@ int main()
auto ret = run(dora_context);
free_dora_context(dora_context);

std::cout << "GOODBYE FROM C++ node (using C API)" << std::endl;

return ret;
}

+ 24
- 11
examples/c++-dataflow/node-rust-api/main.cc View File

@@ -13,25 +13,38 @@ int main()
for (int i = 0; i < 20; i++)
{

auto input = next_input(dora_node.inputs);
if (input.end_of_input)
auto event = next_event(dora_node.events);
auto ty = event_type(event);

if (ty == DoraEventType::AllInputsClosed)
{
break;
}
counter += 1;
else if (ty == DoraEventType::Input)
{
auto input = event_as_input(std::move(event));

counter += 1;

std::cout << "Received input " << std::string(input.id) << " (counter: " << (unsigned int)counter << ")" << std::endl;
std::cout << "Received input " << std::string(input.id) << " (counter: " << (unsigned int)counter << ")" << std::endl;

std::vector<unsigned char> out_vec{counter};
rust::Slice<const uint8_t> out_slice{out_vec.data(), out_vec.size()};
auto result = send_output(dora_node.send_output, "counter", out_slice);
auto error = std::string(result.error);
if (!error.empty())
std::vector<unsigned char> out_vec{counter};
rust::Slice<const uint8_t> out_slice{out_vec.data(), out_vec.size()};
auto result = send_output(dora_node.send_output, "counter", out_slice);
auto error = std::string(result.error);
if (!error.empty())
{
std::cerr << "Error: " << error << std::endl;
return -1;
}
}
else
{
std::cerr << "Error: " << error << std::endl;
return -1;
std::cerr << "Unknown event type " << static_cast<int>(ty) << std::endl;
}
}

std::cout << "GOODBYE FROM C++ node (using Rust API)" << std::endl;

return 0;
}

+ 39
- 28
examples/c++-dataflow/operator-c-api/operator.cc View File

@@ -30,44 +30,55 @@ extern "C" DoraResult_t dora_drop_operator(void *operator_context)
return {};
}

extern "C" OnInputResult_t dora_on_input(
const Input_t *input,
extern "C" OnEventResult_t dora_on_event(
const RawEvent_t *event,
const SendOutput_t *send_output,
void *operator_context)
{
if (event->input != NULL)
{
// input event
Input_t *input = event->input;
std::string id((char *)input->id.ptr, input->id.len);

std::string id((char *)input->id.ptr, input->id.len);
std::vector<unsigned char> data;
for (size_t i = 0; i < input->data.len; i++)
{
data.push_back(*(input->data.ptr + i));
}

std::vector<unsigned char> data;
for (size_t i = 0; i < input->data.len; i++)
{
data.push_back(*(input->data.ptr + i));
}
std::cout
<< "C++ Operator (C-API) received input `" << id << "` with data: [";
for (unsigned char &v : data)
{
std::cout << (unsigned int)v << ", ";
}
std::cout << "]" << std::endl;

std::cout
<< "C++ Operator (C-API) received input `" << id << "` with data: [";
for (unsigned char &v : data)
{
std::cout << (unsigned int)v << ", ";
}
std::cout << "]" << std::endl;
const char *out_id = "half-status";
char *out_id_heap = strdup(out_id);

const char *out_id = "half-status";
char *out_id_heap = strdup(out_id);
size_t out_data_len = 1;
uint8_t *out_data_heap = (uint8_t *)malloc(out_data_len);
*out_data_heap = data[0] / 2;

size_t out_data_len = 1;
uint8_t *out_data_heap = (uint8_t *)malloc(out_data_len);
*out_data_heap = data[0] / 2;
Output_t output = {.id = {
.ptr = (uint8_t *)out_id_heap,
.len = strlen(out_id_heap),
.cap = strlen(out_id_heap) + 1,
},
.data = {.ptr = out_data_heap, .len = out_data_len, .cap = out_data_len}};

Output_t output = {.id = {
.ptr = (uint8_t *)out_id_heap,
.len = strlen(out_id_heap),
.cap = strlen(out_id_heap) + 1,
},
.data = {.ptr = out_data_heap, .len = out_data_len, .cap = out_data_len}};
DoraResult_t send_result = (send_output->send_output.call)(send_output->send_output.env_ptr, output);

DoraResult_t send_result = (send_output->send_output.call)(send_output->send_output.env_ptr, output);
OnEventResult_t result = {.result = send_result, .status = DORA_STATUS_CONTINUE};
return result;
}
if (event->stop)
{
printf("C operator received stop event\n");
}

OnInputResult_t result = {.result = send_result, .status = DORA_STATUS_CONTINUE};
OnEventResult_t result = {.status = DORA_STATUS_CONTINUE};
return result;
}

+ 38
- 24
examples/c++-dataflow/run.rs View File

@@ -4,11 +4,17 @@ use std::{
ffi::{OsStr, OsString},
path::Path,
};
use tracing::metadata::LevelFilter;
use tracing_subscriber::Layer;

#[tokio::main]
async fn main() -> eyre::Result<()> {
set_up_tracing().wrap_err("failed to set up tracing")?;

if cfg!(windows) {
eprintln!("The c++ example does not work on Windows currently because of a linker error");
tracing::error!(
"The c++ example does not work on Windows currently because of a linker error"
);
return Ok(());
}

@@ -20,22 +26,6 @@ async fn main() -> eyre::Result<()> {
tokio::fs::create_dir_all("build").await?;
let build_dir = Path::new("build");

build_package("dora-operator-api-cxx").await?;
let operator_cxxbridge = target
.join("cxxbridge")
.join("dora-operator-api-cxx")
.join("src");
tokio::fs::copy(
operator_cxxbridge.join("lib.rs.cc"),
build_dir.join("operator-bridge.cc"),
)
.await?;
tokio::fs::copy(
operator_cxxbridge.join("lib.rs.h"),
build_dir.join("dora-operator-api.h"),
)
.await?;

build_package("dora-node-api-cxx").await?;
let node_cxxbridge = target
.join("cxxbridge")
@@ -57,6 +47,22 @@ async fn main() -> eyre::Result<()> {
)
.await?;

build_package("dora-operator-api-cxx").await?;
let operator_cxxbridge = target
.join("cxxbridge")
.join("dora-operator-api-cxx")
.join("src");
tokio::fs::copy(
operator_cxxbridge.join("lib.rs.cc"),
build_dir.join("operator-bridge.cc"),
)
.await?;
tokio::fs::copy(
operator_cxxbridge.join("lib.rs.h"),
build_dir.join("dora-operator-api.h"),
)
.await?;

build_package("dora-node-api-c").await?;
build_package("dora-operator-api-c").await?;
build_cxx_node(
@@ -88,7 +94,7 @@ async fn main() -> eyre::Result<()> {
"-l",
"dora_operator_api_cxx",
"-L",
&root.join("target").join("debug").to_str().unwrap(),
root.join("target").join("debug").to_str().unwrap(),
],
)
.await?;
@@ -101,13 +107,10 @@ async fn main() -> eyre::Result<()> {
)
.await?;

let dataflow = Path::new("dataflow.yml").to_owned();
build_package("dora-runtime").await?;

dora_coordinator::run(dora_coordinator::Args {
run_dataflow: Path::new("dataflow.yml").to_owned().into(),
runtime: Some(root.join("target").join("debug").join("dora-runtime")),
})
.await?;
let dora_runtime_path = Some(root.join("target").join("debug").join("dora-runtime"));
dora_daemon::Daemon::run_dataflow(&dataflow, dora_runtime_path).await?;

Ok(())
}
@@ -267,3 +270,14 @@ async fn build_cxx_operator(

Ok(())
}

fn set_up_tracing() -> eyre::Result<()> {
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;

let stdout_log = tracing_subscriber::fmt::layer()
.pretty()
.with_filter(LevelFilter::DEBUG);
let subscriber = tracing_subscriber::Registry::default().with(stdout_log);
tracing::subscriber::set_global_default(subscriber)
.context("failed to set tracing global subscriber")
}

+ 3
- 3
examples/c-dataflow/dataflow.yml View File

@@ -7,15 +7,15 @@ nodes:
custom:
source: build/c_node
inputs:
timer: dora/timer/secs/1
timer: dora/timer/millis/50
outputs:
- tick
- message
- id: runtime-node
operators:
- id: c_operator
shared-library: build/operator
inputs:
tick: c_node/tick
message: c_node/message
outputs:
- counter
- id: c_sink


+ 28
- 13
examples/c-dataflow/node.c View File

@@ -23,29 +23,44 @@ int main()

printf("[c node] dora context initialized\n");

for (char i = 0; i < 10; i++)
for (char i = 0; i < 100; i++)
{
printf("[c node] waiting for next input\n");
void *input = dora_next_input(dora_context);
if (input == NULL)
void *event = dora_next_event(dora_context);
if (event == NULL)
{
printf("[c node] ERROR: unexpected end of input\n");
printf("[c node] ERROR: unexpected end of event\n");
return -1;
}

char *data;
size_t data_len;
read_dora_input_data(input, &data, &data_len);
enum DoraEventType ty = read_dora_event_type(event);

assert(data_len == 0);
if (ty == DoraEventType_Input)
{
char *data;
size_t data_len;
read_dora_input_data(event, &data, &data_len);

assert(data_len == 0);

char out_id[] = "tick";
dora_send_output(dora_context, out_id, strlen(out_id), &i, 1);
char out_id[] = "message";
char out_data[50];
int out_data_len = sprintf(out_data, "loop iteration %d", i);

dora_send_output(dora_context, out_id, strlen(out_id), out_data, out_data_len);
}
else if (ty == DoraEventType_Stop)
{
printf("[c node] received stop event\n");
}
else
{
printf("[c node] received unexpected event: %d\n", ty);
}

free_dora_input(input);
free_dora_event(event);
}

printf("[c node] received 10 inputs\n");
printf("[c node] received 10 events\n");

free_dora_context(dora_context);



+ 42
- 31
examples/c-dataflow/operator.c View File

@@ -22,49 +22,60 @@ DoraResult_t dora_drop_operator(void *operator_context)
return result;
}

OnInputResult_t dora_on_input(
const Input_t *input,
OnEventResult_t dora_on_event(
const RawEvent_t *event,
const SendOutput_t *send_output,
void *operator_context)
{
char *counter = (char *)operator_context;

char id[input->id.len + 1];
memcpy(id, input->id.ptr, input->id.len);
id[input->id.len] = 0;

if (strcmp(id, "tick") == 0)
if (event->input != NULL)
{
char data[input->data.len + 1];
memcpy(data, input->data.ptr, input->data.len);
data[input->data.len] = 0;
// input event
Input_t *input = event->input;

char id[input->id.len + 1];
memcpy(id, input->id.ptr, input->id.len);
id[input->id.len] = 0;

if (strcmp(id, "message") == 0)
{
char data[input->data.len + 1];
memcpy(data, input->data.ptr, input->data.len);
data[input->data.len] = 0;

*counter += 1;
printf("C operator received tick input with data `%s`, counter: %i\n", data, *counter);
*counter += 1;
printf("C operator received message `%s`, counter: %i\n", data, *counter);

char *out_id = "counter";
char *out_id_heap = strdup(out_id);
char *out_id = "counter";
char *out_id_heap = strdup(out_id);

int data_alloc_size = 100;
char *out_data = (char *)malloc(data_alloc_size);
int count = snprintf(out_data, data_alloc_size, "The current counter value is %d", *counter);
assert(count >= 0 && count < 100);
int data_alloc_size = 100;
char *out_data = (char *)malloc(data_alloc_size);
int count = snprintf(out_data, data_alloc_size, "The current counter value is %d", *counter);
assert(count >= 0 && count < 100);

Output_t output = {.id = {
.ptr = (uint8_t *)out_id_heap,
.len = strlen(out_id_heap),
.cap = strlen(out_id_heap) + 1,
},
.data = {.ptr = (uint8_t *)out_data, .len = strlen(out_data), .cap = data_alloc_size}};
DoraResult_t res = (send_output->send_output.call)(send_output->send_output.env_ptr, output);
Output_t output = {.id = {
.ptr = (uint8_t *)out_id_heap,
.len = strlen(out_id_heap),
.cap = strlen(out_id_heap) + 1,
},
.data = {.ptr = (uint8_t *)out_data, .len = strlen(out_data), .cap = data_alloc_size}};
DoraResult_t res = (send_output->send_output.call)(send_output->send_output.env_ptr, output);

OnInputResult_t result = {.result = res, .status = DORA_STATUS_CONTINUE};
return result;
OnEventResult_t result = {.result = res, .status = DORA_STATUS_CONTINUE};
return result;
}
else
{
printf("C operator received unexpected input %s, context: %i\n", id, *counter);
}
}
else
if (event->stop)
{
printf("C operator received unexpected input %s, context: %i\n", id, *counter);
OnInputResult_t result = {.status = DORA_STATUS_CONTINUE};
return result;
printf("C operator received stop event\n");
}

OnEventResult_t result = {.status = DORA_STATUS_CONTINUE};
return result;
}

+ 21
- 7
examples/c-dataflow/run.rs View File

@@ -4,27 +4,30 @@ use std::{
ffi::{OsStr, OsString},
path::Path,
};
use tracing::metadata::LevelFilter;
use tracing_subscriber::Layer;

#[tokio::main]
async fn main() -> eyre::Result<()> {
set_up_tracing().wrap_err("failed to set up tracing")?;

let root = Path::new(env!("CARGO_MANIFEST_DIR"));
std::env::set_current_dir(root.join(file!()).parent().unwrap())
.wrap_err("failed to set working dir")?;

tokio::fs::create_dir_all("build").await?;

build_package("dora-runtime").await?;
build_package("dora-node-api-c").await?;
build_package("dora-operator-api-c").await?;
build_c_node(root, "node.c", "c_node").await?;
build_c_node(root, "sink.c", "c_sink").await?;

build_package("dora-operator-api-c").await?;
build_c_operator().await?;

dora_coordinator::run(dora_coordinator::Args {
run_dataflow: Path::new("dataflow.yml").to_owned().into(),
runtime: Some(root.join("target").join("debug").join("dora-runtime")),
})
.await?;
let dataflow = Path::new("dataflow.yml").to_owned();
build_package("dora-runtime").await?;
let dora_runtime_path = Some(root.join("target").join("debug").join("dora-runtime"));
dora_daemon::Daemon::run_dataflow(&dataflow, dora_runtime_path).await?;

Ok(())
}
@@ -124,3 +127,14 @@ async fn build_c_operator() -> eyre::Result<()> {

Ok(())
}

fn set_up_tracing() -> eyre::Result<()> {
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;

let stdout_log = tracing_subscriber::fmt::layer()
.pretty()
.with_filter(LevelFilter::DEBUG);
let subscriber = tracing_subscriber::Registry::default().with(stdout_log);
tracing::subscriber::set_global_default(subscriber)
.context("failed to set tracing global subscriber")
}

+ 30
- 16
examples/c-dataflow/sink.c View File

@@ -18,29 +18,43 @@ int main()

while (1)
{
printf("[c sink] waiting for next input\n");
void *input = dora_next_input(dora_context);
if (input == NULL)
void *event = dora_next_event(dora_context);
if (event == NULL)
{
printf("[c sink] end of input\n");
printf("[c sink] end of event\n");
break;
}

char *id;
size_t id_len;
read_dora_input_id(input, &id, &id_len);
enum DoraEventType ty = read_dora_event_type(event);

char *data;
size_t data_len;
read_dora_input_data(input, &data, &data_len);
if (ty == DoraEventType_Input)
{
char *id;
size_t id_len;
read_dora_input_id(event, &id, &id_len);

char *data;
size_t data_len;
read_dora_input_data(event, &data, &data_len);

printf("sink received input `");
fwrite(id, id_len, 1, stdout);
printf("` with data: '");
fwrite(data, data_len, 1, stdout);
printf("'\n");
printf("[c sink] received input `");
fwrite(id, id_len, 1, stdout);
printf("` with data: %s\n", data);
}
else if (ty == DoraEventType_InputClosed)
{
printf("[c sink] received InputClosed event\n");
}
else if (ty == DoraEventType_Stop)
{
printf("[c sink] received stop event\n");
}
else
{
printf("[c sink] received unexpected event: %d\n", ty);
}

free_dora_input(input);
free_dora_event(event);
}

free_dora_context(dora_context);


+ 0
- 26
examples/iceoryx/dataflow.yml View File

@@ -1,26 +0,0 @@
communication:
iceoryx:
app_name_prefix: dora-iceoryx-example

nodes:
- id: rust-node
custom:
source: ../../target/debug/iceoryx-example-node
inputs:
tick: dora/timer/millis/300
outputs:
- random
- id: runtime-node
operators:
- id: rust-operator
shared-library: ../../target/debug/iceoryx_example_operator
inputs:
tick: dora/timer/millis/100
random: rust-node/random
outputs:
- status
- id: rust-sink
custom:
source: ../../target/debug/iceoryx-example-sink
inputs:
message: runtime-node/rust-operator/status

+ 0
- 34
examples/iceoryx/node/src/main.rs View File

@@ -1,34 +0,0 @@
use dora_node_api::{self, dora_core::config::DataId, DoraNode};

fn main() -> eyre::Result<()> {
let output = DataId::from("random".to_owned());

let mut operator = DoraNode::init_from_env()?;

let inputs = operator.inputs()?;

for _ in 0..20 {
let input = match inputs.recv() {
Ok(input) => input,
Err(_) => break,
};

match input.id.as_str() {
"tick" => {
let random: u64 = rand::random();
let data: &[u8] = &random.to_le_bytes();
operator.send_output(
&output,
input.metadata().parameters.clone(),
data.len(),
|out| {
out.copy_from_slice(data);
},
)?;
}
other => eprintln!("Ignoring unexpected input `{other}`"),
}
}

Ok(())
}

+ 0
- 13
examples/iceoryx/operator/Cargo.toml View File

@@ -1,13 +0,0 @@
[package]
name = "iceoryx-example-operator"
version.workspace = true
edition = "2021"
license = "Apache-2.0"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[lib]
crate-type = ["cdylib"]

[dependencies]
dora-operator-api = { path = "../../../apis/rust/operator" }

+ 0
- 47
examples/iceoryx/operator/src/lib.rs View File

@@ -1,47 +0,0 @@
#![warn(unsafe_op_in_unsafe_fn)]

use dora_operator_api::{register_operator, DoraOperator, DoraOutputSender, DoraStatus};
use std::time::{Duration, Instant};

register_operator!(ExampleOperator);

#[derive(Debug, Default)]
struct ExampleOperator {
ticks: usize,
last_random_at: Option<Instant>,
}

impl DoraOperator for ExampleOperator {
fn on_input(
&mut self,
id: &str,
data: &[u8],
output_sender: &mut DoraOutputSender,
) -> Result<DoraStatus, String> {
match id {
"tick" => {
self.ticks += 1;
}
"random" => {
let parsed = {
let data: [u8; 8] = data.try_into().map_err(|_| "unexpected random data")?;
u64::from_le_bytes(data)
};
let output = format!(
"operator received random value {parsed} after {} ticks",
self.ticks
);
output_sender.send("status".into(), output.into_bytes())?;
self.last_random_at = Some(Instant::now());
}
other => eprintln!("ignoring unexpected input {other}"),
}
if let Some(last_random_at) = self.last_random_at {
if last_random_at.elapsed() > Duration::from_secs(1) {
// looks like the node sending the random values finished -> exit too
return Ok(DoraStatus::Stop);
}
}
Ok(DoraStatus::Continue)
}
}

+ 0
- 33
examples/iceoryx/run.rs View File

@@ -1,33 +0,0 @@
use eyre::{bail, Context};
use std::path::Path;

#[tokio::main]
async fn main() -> eyre::Result<()> {
let root = Path::new(env!("CARGO_MANIFEST_DIR"));
std::env::set_current_dir(root.join(file!()).parent().unwrap())
.wrap_err("failed to set working dir")?;

build_package("iceoryx-example-node").await?;
build_package("iceoryx-example-operator").await?;
build_package("iceoryx-example-sink").await?;
build_package("dora-runtime").await?;

dora_coordinator::run(dora_coordinator::Args {
run_dataflow: Path::new("dataflow.yml").to_owned().into(),
runtime: Some(root.join("target").join("debug").join("dora-runtime")),
})
.await?;

Ok(())
}

async fn build_package(package: &str) -> eyre::Result<()> {
let cargo = std::env::var("CARGO").unwrap();
let mut cmd = tokio::process::Command::new(&cargo);
cmd.arg("build");
cmd.arg("--package").arg(package);
if !cmd.status().await?.success() {
bail!("failed to build {package}");
};
Ok(())
}

+ 0
- 28
examples/iceoryx/sink/src/main.rs View File

@@ -1,28 +0,0 @@
use dora_node_api::{self, DoraNode};
use eyre::{bail, Context};

fn main() -> eyre::Result<()> {
let mut operator = DoraNode::init_from_env()?;

let inputs = operator.inputs()?;

while let Ok(input) = inputs.recv() {
match input.id.as_str() {
"message" => {
let data = input.data();
let received_string =
std::str::from_utf8(&data).wrap_err("received message was not utf8-encoded")?;
println!("received message: {}", received_string);
if !received_string.starts_with("operator received random value ") {
bail!("unexpected message format (should start with 'operator received random value')")
}
if !received_string.ends_with(" ticks") {
bail!("unexpected message format (should end with 'ticks')")
}
}
other => eprintln!("Ignoring unexpected input `{other}`"),
}
}

Ok(())
}

+ 4
- 4
examples/python-dataflow/dataflow.yml View File

@@ -12,16 +12,16 @@ nodes:
- image

- id: object_detection
operator:
python: object_detection.py
custom:
source: ./object_detection.py
inputs:
image: webcam/image
outputs:
- bbox

- id: plot
operator:
python: plot.py
custom:
source: ./plot.py
inputs:
image: webcam/image
bbox: object_detection/bbox

+ 4
- 4
examples/python-dataflow/dataflow_without_webcam.yml View File

@@ -12,16 +12,16 @@ nodes:
- image

- id: object_detection
operator:
python: object_detection.py
custom:
source: ./object_detection.py
inputs:
image: no_webcam/image
outputs:
- bbox

- id: plot
operator:
python: plot.py
custom:
source: ./plot.py
inputs:
image: no_webcam/image
bbox: object_detection/bbox

+ 10
- 5
examples/python-dataflow/no_webcam.py View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import time
@@ -17,7 +17,12 @@ start = time.time()

while time.time() - start < 20:
# Wait next dora_input
node.next()
node.send_output("image", arr.tobytes())

time.sleep(1)
event = node.next()
match event["type"]:
case "INPUT":
print("received input", event["id"])
node.send_output("image", arr.tobytes())
case "STOP":
print("received stop")
case other:
print("received unexpected event:", other)

+ 27
- 30
examples/python-dataflow/object_detection.py View File

@@ -1,36 +1,33 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from typing import Callable
from dora import Node

import cv2
import numpy as np
import torch

from dora import DoraStatus


class Operator:
"""
Infering object from images
"""

def __init__(self):
self.model = torch.hub.load("ultralytics/yolov5", "yolov5n")

def on_input(
self,
dora_input: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
"""Handle image

Args:
dora_input (dict): Dict containing the "id", "data", and "metadata"
send_output (Callable[[str, bytes]]): Function enabling sending output back to dora.
"""

frame = np.frombuffer(dora_input["data"], dtype="uint8")
frame = cv2.imdecode(frame, -1)
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
results = self.model(frame) # includes NMS
arrays = np.array(results.xyxy[0].cpu()).tobytes()
send_output("bbox", arrays, dora_input["metadata"])
return DoraStatus.CONTINUE
model = torch.hub.load("ultralytics/yolov5", "yolov5n")

node = Node()

for event in node:
match event["type"]:
case "INPUT":
match event["id"]:
case "image":
print("received image input")
frame = np.frombuffer(event["data"], dtype="uint8")
frame = cv2.imdecode(frame, -1)
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
results = model(frame) # includes NMS
arrays = np.array(results.xyxy[0].cpu()).tobytes()

node.send_output("bbox", arrays, event["metadata"])
case other:
print("ignoring unexpected input:", other)
case "STOP":
print("received stop")
case other:
print("received unexpected event:", other)

+ 26
- 5
examples/python-dataflow/plot.py View File

@@ -1,16 +1,19 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
from typing import Callable
from dora import Node
from dora import DoraStatus

import cv2
import numpy as np
from utils import LABELS

from dora import DoraStatus

CI = os.environ.get("CI")

font = cv2.FONT_HERSHEY_SIMPLEX
class Operator:
class Plotter:
"""
Plot image and bounding box
"""
@@ -22,7 +25,6 @@ class Operator:
def on_input(
self,
dora_input: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
"""
Put image and bounding box on cv2 window.
@@ -30,7 +32,6 @@ class Operator:
Args:
dora_input["id"] (str): Id of the dora_input declared in the yaml configuration
dora_input["data"] (bytes): Bytes message of the dora_input
send_output (Callable[[str, bytes]]): Function enabling sending output back to dora.
"""
if dora_input["id"] == "image":
frame = np.frombuffer(dora_input["data"], dtype="uint8")
@@ -74,3 +75,23 @@ class Operator:
return DoraStatus.STOP

return DoraStatus.CONTINUE



plotter = Plotter()
node = Node()

for event in node:
match event["type"]:
case "INPUT":
status = plotter.on_input(event)
match status:
case DoraStatus.CONTINUE:
pass
case DoraStatus.STOP:
print("plotter returned stop status")
break
case "STOP":
print("received stop")
case other:
print("received unexpected event:", other)

+ 12
- 1
examples/python-dataflow/run.rs View File

@@ -3,11 +3,13 @@ use std::{env, path::Path};

#[tokio::main]
async fn main() -> eyre::Result<()> {
set_up_tracing().wrap_err("failed to set up tracing subscriber")?;

let root = Path::new(env!("CARGO_MANIFEST_DIR"));
std::env::set_current_dir(root.join(file!()).parent().unwrap())
.wrap_err("failed to set working dir")?;

build_package("dora-runtime").await?;
build_package("dora-daemon").await?;

run(root).await?;

@@ -33,3 +35,12 @@ async fn run(_root: &Path) -> eyre::Result<()> {
};
Ok(())
}

fn set_up_tracing() -> eyre::Result<()> {
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;

let stdout_log = tracing_subscriber::fmt::layer().pretty();
let subscriber = tracing_subscriber::Registry::default().with(stdout_log);
tracing::subscriber::set_global_default(subscriber)
.context("failed to set tracing global subscriber")
}

+ 3
- 1
examples/python-dataflow/run.sh View File

@@ -1,3 +1,5 @@
set -e

python3 -m venv .env
. $(pwd)/.env/bin/activate
# Dev dependencies
@@ -10,4 +12,4 @@ cd ../../../examples/python-dataflow
pip install --upgrade pip
pip install -r requirements.txt

cargo run -p dora-coordinator -- --run-dataflow dataflow_without_webcam.yml
cargo run -p dora-daemon -- --run-dataflow dataflow_without_webcam.yml

+ 12
- 4
examples/python-dataflow/webcam.py View File

@@ -15,9 +15,17 @@ start = time.time()
# Run for 20 seconds
while time.time() - start < 10:
# Wait next dora_input
node.next()
ret, frame = video_capture.read()
if ret:
node.send_output("image", cv2.imencode(".jpg", frame)[1].tobytes())
event = node.next()
match event["type"]:
case "INPUT":
ret, frame = video_capture.read()
if ret:
node.send_output("image", cv2.imencode(".jpg", frame)[1].tobytes())
case "STOP":
print("received stop")
break
case other:
print("received unexpected event:", other)
break

video_capture.release()

+ 1
- 0
examples/python-operator-dataflow/.gitignore View File

@@ -0,0 +1 @@
*.pt

+ 33
- 0
examples/python-operator-dataflow/README.md View File

@@ -0,0 +1,33 @@
# Python Dataflow Example

This examples shows how to create and connect dora operators and custom nodes in Python.

## Overview

The [`dataflow.yml`](./dataflow.yml) defines a simple dataflow graph with the following three nodes:

- a webcam node, that connects to your webcam and feed the dataflow with webcam frame as jpeg compressed bytearray.
- an object detection node, that apply Yolo v5 on the webcam image. The model is imported from Pytorch Hub. The output is the bouding box of each object detected, the confidence and the class. You can have more info here: https://pytorch.org/hub/ultralytics_yolov5/
- a window plotting node, that will retrieve the webcam image and the Yolov5 bounding box and join the two together.

## Getting started

```bash
cargo run --example python-dataflow
```

## Installation

To install, you should run the `install.sh` script.

```bash
install.sh
```

## Run the dataflow as a standalone

- Start the `dora-coordinator`, passing the paths to the dataflow file and the `dora-runtime` as arguments:

```
../../target/release/dora-coordinator --run-dataflow dataflow.yml ../../target/release/dora-runtime
```

+ 27
- 0
examples/python-operator-dataflow/dataflow.yml View File

@@ -0,0 +1,27 @@
communication:
zenoh:
prefix: /example-python-dataflow

nodes:
- id: webcam
custom:
source: webcam.py
inputs:
tick: dora/timer/millis/100
outputs:
- image

- id: object_detection
operator:
python: object_detection.py
inputs:
image: webcam/image
outputs:
- bbox

- id: plot
operator:
python: plot.py
inputs:
image: webcam/image
bbox: object_detection/bbox

+ 27
- 0
examples/python-operator-dataflow/dataflow_without_webcam.yml View File

@@ -0,0 +1,27 @@
communication:
zenoh:
prefix: /example-python-no-webcam-dataflow

nodes:
- id: no_webcam
custom:
source: ./no_webcam.py
inputs:
tick: dora/timer/millis/100
outputs:
- image

- id: object_detection
operator:
python: object_detection.py
inputs:
image: no_webcam/image
outputs:
- bbox

- id: plot
operator:
python: plot.py
inputs:
image: no_webcam/image
bbox: object_detection/bbox

+ 31
- 0
examples/python-operator-dataflow/no_webcam.py View File

@@ -0,0 +1,31 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import time
import urllib.request

import cv2
import numpy as np
from dora import Node

print("Hello from no_webcam.py")


req = urllib.request.urlopen("https://ultralytics.com/images/zidane.jpg")

arr = np.asarray(bytearray(req.read()), dtype=np.uint8)
node = Node()

start = time.time()

while time.time() - start < 20:
# Wait next dora_input
event = node.next()
match event["type"]:
case "INPUT":
print("received input", event["id"])
node.send_output("image", arr.tobytes())
case "STOP":
print("received stop")
case other:
print("received unexpected event:", other)

+ 51
- 0
examples/python-operator-dataflow/object_detection.py View File

@@ -0,0 +1,51 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from enum import Enum
from typing import Callable

import cv2
import numpy as np
import torch


class DoraStatus(Enum):
CONTINUE = 0
STOP = 1


class Operator:
"""
Infering object from images
"""

def __init__(self):
self.model = torch.hub.load("ultralytics/yolov5", "yolov5n")

def on_event(
self,
dora_event: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
if dora_event["type"] == "INPUT":
return self.on_input(dora_event, send_output)
return DoraStatus.CONTINUE

def on_input(
self,
dora_input: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
"""Handle image
Args:
dora_input (dict): Dict containing the "id", "data", and "metadata"
send_output (Callable[[str, bytes]]): Function enabling sending output back to dora.
"""

frame = np.frombuffer(dora_input["data"], dtype="uint8")
frame = cv2.imdecode(frame, -1)
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
results = self.model(frame) # includes NMS
arrays = np.array(results.xyxy[0].cpu()).tobytes()
send_output("bbox", arrays, dora_input["metadata"])
return DoraStatus.CONTINUE

+ 104
- 0
examples/python-operator-dataflow/plot.py View File

@@ -0,0 +1,104 @@
import os
from enum import Enum
from typing import Callable

import cv2
import numpy as np

from utils import LABELS

CI = os.environ.get("CI")

font = cv2.FONT_HERSHEY_SIMPLEX


class DoraStatus(Enum):
CONTINUE = 0
STOP = 1


class Operator:
"""
Plot image and bounding box
"""

def __init__(self):
self.image = []
self.bboxs = []
self.bounding_box_messages = 0
self.image_messages = 0

def on_event(
self,
dora_event: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
if dora_event["type"] == "INPUT":
return self.on_input(dora_event, send_output)
return DoraStatus.CONTINUE

def on_input(
self,
dora_input: dict,
send_output: Callable[[str, bytes], None],
) -> DoraStatus:
"""
Put image and bounding box on cv2 window.

Args:
dora_input["id"] (str): Id of the dora_input declared in the yaml configuration
dora_input["data"] (bytes): Bytes message of the dora_input
send_output (Callable[[str, bytes]]): Function enabling sending output back to dora.
"""
if dora_input["id"] == "image":
frame = np.frombuffer(dora_input["data"], dtype="uint8")
frame = cv2.imdecode(frame, -1)
self.image = frame

self.image_messages += 1
print("received " + str(self.image_messages) + " images")

elif dora_input["id"] == "bbox" and len(self.image) != 0:
bboxs = np.frombuffer(dora_input["data"], dtype="float32")
self.bboxs = np.reshape(bboxs, (-1, 6))

self.bounding_box_messages += 1
print("received " + str(self.bounding_box_messages) + " bounding boxes")

for bbox in self.bboxs:
[
min_x,
min_y,
max_x,
max_y,
confidence,
label,
] = bbox
cv2.rectangle(
self.image,
(int(min_x), int(min_y)),
(int(max_x), int(max_y)),
(0, 255, 0),
2,
)

cv2.putText(
self.image,
LABELS[int(label)] + f", {confidence:0.2f}",
(int(max_x), int(max_y)),
font,
0.75,
(0, 255, 0),
2,
1,
)

if CI != "true":
cv2.imshow("frame", self.image)
if cv2.waitKey(1) & 0xFF == ord("q"):
return DoraStatus.STOP

return DoraStatus.CONTINUE

def __del__(self):
cv2.destroyAllWindows()

+ 45
- 0
examples/python-operator-dataflow/requirements.txt View File

@@ -0,0 +1,45 @@
# YOLOv5 requirements
# Usage: pip install -r requirements.txt

# Base ----------------------------------------
matplotlib>=3.2.2
numpy>=1.18.5
opencv-python>=4.1.1
Pillow>=7.1.2
PyYAML>=5.3.1
requests>=2.23.0
scipy>=1.4.1
torch>=1.7.0
torchvision>=0.8.1
tqdm>=4.64.0
protobuf<=3.20.1 # https://github.com/ultralytics/yolov5/issues/8012

# Logging -------------------------------------
tensorboard>=2.4.1
# wandb
# clearml

# Plotting ------------------------------------
pandas>=1.1.4
seaborn>=0.11.0

# Export --------------------------------------
# coremltools>=5.2 # CoreML export
# onnx>=1.9.0 # ONNX export
# onnx-simplifier>=0.4.1 # ONNX simplifier
# nvidia-pyindex # TensorRT export
# nvidia-tensorrt # TensorRT export
# scikit-learn==0.19.2 # CoreML quantization
# tensorflow>=2.4.1 # TFLite export (or tensorflow-cpu, tensorflow-aarch64)
# tensorflowjs>=3.9.0 # TF.js export
# openvino-dev # OpenVINO export

# Extras --------------------------------------
ipython # interactive notebook
psutil # system utilization
thop>=0.1.1 # FLOPs computation
# albumentations>=1.0.3
# pycocotools>=2.0 # COCO mAP
# roboflow

opencv-python>=4.1.1

+ 46
- 0
examples/python-operator-dataflow/run.rs View File

@@ -0,0 +1,46 @@
use eyre::{bail, Context};
use std::{env, path::Path};

#[tokio::main]
async fn main() -> eyre::Result<()> {
set_up_tracing().wrap_err("failed to set up tracing subscriber")?;

let root = Path::new(env!("CARGO_MANIFEST_DIR"));
std::env::set_current_dir(root.join(file!()).parent().unwrap())
.wrap_err("failed to set working dir")?;

build_package("dora-daemon").await?;

run(root).await?;

Ok(())
}

async fn build_package(package: &str) -> eyre::Result<()> {
let cargo = std::env::var("CARGO").unwrap();
let mut cmd = tokio::process::Command::new(&cargo);
cmd.arg("build");
cmd.arg("--package").arg(package);
if !cmd.status().await?.success() {
bail!("failed to build {package}");
};
Ok(())
}

async fn run(_root: &Path) -> eyre::Result<()> {
let mut run = tokio::process::Command::new("sh");
run.arg("./run.sh");
if !run.status().await?.success() {
bail!("failed to run python example.");
};
Ok(())
}

fn set_up_tracing() -> eyre::Result<()> {
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;

let stdout_log = tracing_subscriber::fmt::layer().pretty();
let subscriber = tracing_subscriber::Registry::default().with(stdout_log);
tracing::subscriber::set_global_default(subscriber)
.context("failed to set tracing global subscriber")
}

+ 15
- 0
examples/python-operator-dataflow/run.sh View File

@@ -0,0 +1,15 @@
set -e

python3 -m venv .env
. $(pwd)/.env/bin/activate
# Dev dependencies
pip install maturin
cd ../../apis/python/node
maturin develop
cd ../../../examples/python-operator-dataflow

# Dependencies
pip install --upgrade pip
pip install -r requirements.txt

cargo run -p dora-daemon -- --run-dataflow dataflow_without_webcam.yml

+ 82
- 0
examples/python-operator-dataflow/utils.py View File

@@ -0,0 +1,82 @@
LABELS = [
"ABC",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush",
]

+ 31
- 0
examples/python-operator-dataflow/webcam.py View File

@@ -0,0 +1,31 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import time

import cv2
from dora import Node

node = Node()

video_capture = cv2.VideoCapture(0)

start = time.time()

# Run for 20 seconds
while time.time() - start < 10:
# Wait next dora_input
event = node.next()
match event["type"]:
case "INPUT":
ret, frame = video_capture.read()
if ret:
node.send_output("image", cv2.imencode(".jpg", frame)[1].tobytes())
case "STOP":
print("received stop")
break
case other:
print("received unexpected event:", other)
break

video_capture.release()

+ 16
- 6
examples/rust-dataflow-url/run.rs View File

@@ -1,21 +1,20 @@
use eyre::{bail, Context};
use std::path::Path;
use tracing::metadata::LevelFilter;
use tracing_subscriber::Layer;

#[tokio::main]
async fn main() -> eyre::Result<()> {
set_up_tracing().wrap_err("failed to set up tracing")?;

let root = Path::new(env!("CARGO_MANIFEST_DIR"));
std::env::set_current_dir(root.join(file!()).parent().unwrap())
.wrap_err("failed to set working dir")?;

let dataflow = Path::new("dataflow.yml");
build_dataflow(dataflow).await?;
build_package("dora-runtime").await?;

dora_coordinator::run(dora_coordinator::Args {
run_dataflow: dataflow.to_owned().into(),
runtime: Some(root.join("target").join("debug").join("dora-runtime")),
})
.await?;
dora_daemon::Daemon::run_dataflow(dataflow, None).await?;

Ok(())
}
@@ -42,3 +41,14 @@ async fn build_package(package: &str) -> eyre::Result<()> {
};
Ok(())
}

fn set_up_tracing() -> eyre::Result<()> {
use tracing_subscriber::prelude::__tracing_subscriber_SubscriberExt;

let stdout_log = tracing_subscriber::fmt::layer()
.pretty()
.with_filter(LevelFilter::DEBUG);
let subscriber = tracing_subscriber::Registry::default().with(stdout_log);
tracing::subscriber::set_global_default(subscriber)
.context("failed to set tracing global subscriber")
}

+ 3
- 1
examples/rust-dataflow/dataflow.yml View File

@@ -2,13 +2,15 @@ communication:
zenoh:
prefix: /example-rust-dataflow

daemon_config: Tcp # or Shmem

nodes:
- id: rust-node
custom:
build: cargo build -p rust-dataflow-example-node
source: ../../target/debug/rust-dataflow-example-node
inputs:
tick: dora/timer/millis/300
tick: dora/timer/millis/10
outputs:
- random
- id: runtime-node


+ 1
- 1
examples/rust-dataflow/node/Cargo.toml View File

@@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
dora-node-api = { workspace = true, features = ["zenoh"] }
dora-node-api = { workspace = true }
eyre = "0.6.8"
futures = "0.3.21"
rand = "0.8.5"


+ 25
- 21
examples/rust-dataflow/node/src/main.rs View File

@@ -1,32 +1,36 @@
use dora_node_api::{self, dora_core::config::DataId, DoraNode};
use dora_node_api::{self, dora_core::config::DataId, DoraNode, Event};

fn main() -> eyre::Result<()> {
let output = DataId::from("random".to_owned());
println!("hello");

let mut operator = DoraNode::init_from_env()?;
let output = DataId::from("random".to_owned());

let inputs = operator.inputs()?;
let (mut node, mut events) = DoraNode::init_from_env()?;

for _ in 0..20 {
let input = match inputs.recv() {
Ok(input) => input,
Err(_) => break,
for i in 0..100 {
let event = match events.recv() {
Some(input) => input,
None => break,
};

match input.id.as_str() {
"tick" => {
let random: u64 = rand::random();
let data: &[u8] = &random.to_le_bytes();
operator.send_output(
&output,
input.metadata().parameters.clone(),
data.len(),
|out| {
match event {
Event::Input {
id,
metadata,
data: _,
} => match id.as_str() {
"tick" => {
let random: u64 = rand::random();
println!("tick {i}, sending {random:#x}");
let data: &[u8] = &random.to_le_bytes();
node.send_output(output.clone(), metadata.parameters, data.len(), |out| {
out.copy_from_slice(data);
},
)?;
}
other => eprintln!("Ignoring unexpected input `{other}`"),
})?;
}
other => eprintln!("Ignoring unexpected input `{other}`"),
},
Event::Stop => println!("Received manual stop"),
other => eprintln!("Received unexpected input: {other:?}"),
}
}



Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save