Compare commits

...

17 changed files with 1277 additions and 143 deletions
Unified View
  1. +434
    -34
      Cargo.lock
  2. +1
    -0
      Cargo.toml
  3. +1
    -0
      binaries/cli/src/command/coordinator.rs
  4. +1
    -0
      binaries/cli/src/command/daemon.rs
  5. +16
    -16
      binaries/cli/src/command/mod.rs
  6. +1
    -0
      binaries/cli/src/command/run.rs
  7. +8
    -8
      binaries/cli/src/command/start/mod.rs
  8. +1
    -1
      binaries/cli/src/lib.rs
  9. +78
    -0
      examples/openai-realtime/README.md
  10. +54
    -0
      examples/openai-realtime/whisper-template-metal.yml
  11. +77
    -35
      node-hub/dora-distil-whisper/dora_distil_whisper/main.py
  12. +20
    -17
      node-hub/dora-kokoro-tts/dora_kokoro_tts/main.py
  13. +5
    -1
      node-hub/dora-kokoro-tts/pyproject.toml
  14. +31
    -0
      node-hub/dora-openai-websocket/Cargo.toml
  15. +513
    -0
      node-hub/dora-openai-websocket/src/main.rs
  16. +21
    -27
      node-hub/dora-qwen/dora_qwen/main.py
  17. +15
    -4
      node-hub/dora-vad/dora_vad/main.py

+ 434
- 34
Cargo.lock View File

@@ -264,6 +264,12 @@ dependencies = [
"libc", "libc",
] ]


[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"

[[package]] [[package]]
name = "ansi_colours" name = "ansi_colours"
version = "1.2.3" version = "1.2.3"
@@ -666,7 +672,7 @@ dependencies = [
"enumflags2", "enumflags2",
"futures-channel", "futures-channel",
"futures-util", "futures-util",
"rand 0.9.1",
"rand 0.9.2",
"raw-window-handle 0.6.2", "raw-window-handle 0.6.2",
"serde", "serde",
"serde_repr", "serde_repr",
@@ -713,6 +719,29 @@ dependencies = [
"syn 2.0.101", "syn 2.0.101",
] ]


[[package]]
name = "assert2"
version = "0.3.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d6c710e60d14b07d8f42d0e702b16120865eea39edb751e75cd6bf401d18f14"
dependencies = [
"assert2-macros",
"diff",
"yansi",
]

[[package]]
name = "assert2-macros"
version = "0.3.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9008cbbba9e1d655538870b91fd93814bd82e6968f27788fc734375120ac6f57"
dependencies = [
"proc-macro2",
"quote",
"rustc_version",
"syn 2.0.101",
]

[[package]] [[package]]
name = "assert_matches" name = "assert_matches"
version = "1.5.0" version = "1.5.0"
@@ -1179,14 +1208,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"axum-core",
"axum-core 0.4.5",
"bytes", "bytes",
"futures-util", "futures-util",
"http 1.3.1", "http 1.3.1",
"http-body 1.0.1", "http-body 1.0.1",
"http-body-util", "http-body-util",
"itoa", "itoa",
"matchit",
"matchit 0.7.3",
"memchr", "memchr",
"mime", "mime",
"percent-encoding", "percent-encoding",
@@ -1199,6 +1228,40 @@ dependencies = [
"tower-service", "tower-service",
] ]


[[package]]
name = "axum"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5"
dependencies = [
"axum-core 0.5.2",
"bytes",
"form_urlencoded",
"futures-util",
"http 1.3.1",
"http-body 1.0.1",
"http-body-util",
"hyper 1.6.0",
"hyper-util",
"itoa",
"matchit 0.8.4",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tower 0.5.2",
"tower-layer",
"tower-service",
"tracing",
]

[[package]] [[package]]
name = "axum-core" name = "axum-core"
version = "0.4.5" version = "0.4.5"
@@ -1219,6 +1282,26 @@ dependencies = [
"tower-service", "tower-service",
] ]


[[package]]
name = "axum-core"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6"
dependencies = [
"bytes",
"futures-core",
"http 1.3.1",
"http-body 1.0.1",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper",
"tower-layer",
"tower-service",
"tracing",
]

[[package]] [[package]]
name = "az" name = "az"
version = "1.2.1" version = "1.2.1"
@@ -1240,6 +1323,12 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]


[[package]]
name = "base"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a956d500c2380c818e09d3d7c79ba4a1d7fc6354464f1fceaa5705483a29930"

[[package]] [[package]]
name = "base64" name = "base64"
version = "0.13.1" version = "0.13.1"
@@ -1640,7 +1729,7 @@ dependencies = [
"metal 0.27.0", "metal 0.27.0",
"num-traits", "num-traits",
"num_cpus", "num_cpus",
"rand 0.9.1",
"rand 0.9.2",
"rand_distr", "rand_distr",
"rayon", "rayon",
"safetensors", "safetensors",
@@ -1732,6 +1821,12 @@ dependencies = [
"thiserror 1.0.69", "thiserror 1.0.69",
] ]


[[package]]
name = "cast"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"

[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.17" version = "1.2.17"
@@ -1867,6 +1962,33 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901" checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901"


[[package]]
name = "ciborium"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]

[[package]]
name = "ciborium-io"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"

[[package]]
name = "ciborium-ll"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
dependencies = [
"ciborium-io",
"half",
]

[[package]] [[package]]
name = "cipher" name = "cipher"
version = "0.4.4" version = "0.4.4"
@@ -2303,6 +2425,42 @@ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
] ]


[[package]]
name = "criterion"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb"
dependencies = [
"anes",
"atty",
"cast",
"ciborium",
"clap 3.2.25",
"criterion-plot",
"itertools 0.10.5",
"lazy_static",
"num-traits",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]

[[package]]
name = "criterion-plot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
"cast",
"itertools 0.10.5",
]

[[package]] [[package]]
name = "crossbeam" name = "crossbeam"
version = "0.8.4" version = "0.8.4"
@@ -2809,6 +2967,12 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c" checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c"


[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"

[[package]] [[package]]
name = "digest" name = "digest"
version = "0.10.7" version = "0.10.7"
@@ -3266,6 +3430,35 @@ dependencies = [
"uuid 1.16.0", "uuid 1.16.0",
] ]


[[package]]
name = "dora-openai-websocket"
version = "0.1.0"
dependencies = [
"anyhow",
"assert2",
"axum 0.8.4",
"base",
"base64 0.22.1",
"bytes",
"criterion",
"dora-cli",
"dora-node-api",
"fastwebsockets",
"futures-concurrency",
"futures-util",
"http-body-util",
"hyper 1.6.0",
"hyper-util",
"rand 0.9.2",
"rustls-pemfile 1.0.4",
"serde",
"serde_json",
"tokio",
"tokio-rustls 0.24.1",
"trybuild",
"webpki-roots 0.23.1",
]

[[package]] [[package]]
name = "dora-operator-api" name = "dora-operator-api"
version = "0.3.12" version = "0.3.12"
@@ -3362,7 +3555,7 @@ dependencies = [
"ndarray 0.15.6", "ndarray 0.15.6",
"pinyin", "pinyin",
"pyo3", "pyo3",
"rand 0.9.1",
"rand 0.9.2",
"rerun", "rerun",
"tokio", "tokio",
] ]
@@ -4161,6 +4354,26 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"


[[package]]
name = "fastwebsockets"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "305d3ba574508e27190906d11707dad683e0494e6b85eae9b044cb2734a5e422"
dependencies = [
"base64 0.21.7",
"bytes",
"http-body-util",
"hyper 1.6.0",
"hyper-util",
"pin-project",
"rand 0.8.5",
"sha1",
"simdutf8",
"thiserror 1.0.69",
"tokio",
"utf-8",
]

[[package]] [[package]]
name = "fdeflate" name = "fdeflate"
version = "0.3.7" version = "0.3.7"
@@ -4272,7 +4485,7 @@ dependencies = [
"cudarc", "cudarc",
"half", "half",
"num-traits", "num-traits",
"rand 0.9.1",
"rand 0.9.2",
"rand_distr", "rand_distr",
] ]


@@ -5052,7 +5265,7 @@ dependencies = [
"cfg-if 1.0.0", "cfg-if 1.0.0",
"crunchy", "crunchy",
"num-traits", "num-traits",
"rand 0.9.1",
"rand 0.9.2",
"rand_distr", "rand_distr",
] ]


@@ -5392,7 +5605,7 @@ dependencies = [
"rustls 0.23.25", "rustls 0.23.25",
"rustls-pki-types", "rustls-pki-types",
"tokio", "tokio",
"tokio-rustls",
"tokio-rustls 0.26.2",
"tower-service", "tower-service",
"webpki-roots 0.26.8", "webpki-roots 0.26.8",
] ]
@@ -6575,6 +6788,12 @@ version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"


[[package]]
name = "matchit"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"

[[package]] [[package]]
name = "matrixmultiply" name = "matrixmultiply"
version = "0.3.9" version = "0.3.9"
@@ -6858,7 +7077,7 @@ dependencies = [
"image", "image",
"indexmap 2.8.0", "indexmap 2.8.0",
"mistralrs-core", "mistralrs-core",
"rand 0.9.1",
"rand 0.9.2",
"reqwest", "reqwest",
"serde", "serde",
"serde_json", "serde_json",
@@ -6910,7 +7129,7 @@ dependencies = [
"objc", "objc",
"once_cell", "once_cell",
"radix_trie", "radix_trie",
"rand 0.9.1",
"rand 0.9.2",
"rand_isaac", "rand_isaac",
"rayon", "rayon",
"regex", "regex",
@@ -6931,7 +7150,7 @@ dependencies = [
"tokio", "tokio",
"tokio-rayon", "tokio-rayon",
"toktrie_hf_tokenizers", "toktrie_hf_tokenizers",
"toml",
"toml 0.8.20",
"tqdm", "tqdm",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
@@ -7893,6 +8112,12 @@ dependencies = [
"pkg-config", "pkg-config",
] ]


[[package]]
name = "oorandom"
version = "11.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"

[[package]] [[package]]
name = "openssl-probe" name = "openssl-probe"
version = "0.1.6" version = "0.1.6"
@@ -8081,7 +8306,7 @@ dependencies = [
"glob", "glob",
"opentelemetry 0.29.1", "opentelemetry 0.29.1",
"percent-encoding", "percent-encoding",
"rand 0.9.1",
"rand 0.9.2",
"serde_json", "serde_json",
"thiserror 2.0.12", "thiserror 2.0.12",
"tokio", "tokio",
@@ -8531,6 +8756,34 @@ dependencies = [
"time", "time",
] ]


[[package]]
name = "plotters"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]

[[package]]
name = "plotters-backend"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"

[[package]]
name = "plotters-svg"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
dependencies = [
"plotters-backend",
]

[[package]] [[package]]
name = "ply-rs" name = "ply-rs"
version = "0.1.3" version = "0.1.3"
@@ -9161,7 +9414,7 @@ checksum = "b820744eb4dc9b57a3398183639c511b5a26d2ed702cedd3febaa1393caa22cc"
dependencies = [ dependencies = [
"bytes", "bytes",
"getrandom 0.3.2", "getrandom 0.3.2",
"rand 0.9.1",
"rand 0.9.2",
"ring 0.17.14", "ring 0.17.14",
"rustc-hash 2.1.1", "rustc-hash 2.1.1",
"rustls 0.23.25", "rustls 0.23.25",
@@ -9261,9 +9514,9 @@ dependencies = [


[[package]] [[package]]
name = "rand" name = "rand"
version = "0.9.1"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [ dependencies = [
"rand_chacha 0.9.0", "rand_chacha 0.9.0",
"rand_core 0.9.3", "rand_core 0.9.3",
@@ -9314,7 +9567,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
dependencies = [ dependencies = [
"num-traits", "num-traits",
"rand 0.9.1",
"rand 0.9.2",
] ]


[[package]] [[package]]
@@ -9370,7 +9623,7 @@ dependencies = [
"simd_helpers", "simd_helpers",
"system-deps", "system-deps",
"thiserror 1.0.69", "thiserror 1.0.69",
"toml",
"toml 0.8.20",
"v_frame", "v_frame",
"y4m", "y4m",
] ]
@@ -10508,7 +10761,7 @@ dependencies = [
"serde", "serde",
"syn 2.0.101", "syn 2.0.101",
"tempfile", "tempfile",
"toml",
"toml 0.8.20",
"unindent", "unindent",
"xshell", "xshell",
] ]
@@ -11260,7 +11513,7 @@ dependencies = [
"sync_wrapper", "sync_wrapper",
"system-configuration", "system-configuration",
"tokio", "tokio",
"tokio-rustls",
"tokio-rustls 0.26.2",
"tokio-util", "tokio-util",
"tower 0.5.2", "tower 0.5.2",
"tower-service", "tower-service",
@@ -11660,7 +11913,7 @@ dependencies = [
"num-derive", "num-derive",
"num-traits", "num-traits",
"paste", "paste",
"rand 0.9.1",
"rand 0.9.2",
"serde", "serde",
"serde_repr", "serde_repr",
"socket2 0.5.8", "socket2 0.5.8",
@@ -11731,6 +11984,18 @@ dependencies = [
"webpki", "webpki",
] ]


[[package]]
name = "rustls"
version = "0.21.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
dependencies = [
"log",
"ring 0.17.14",
"rustls-webpki 0.101.7",
"sct",
]

[[package]] [[package]]
name = "rustls" name = "rustls"
version = "0.23.25" version = "0.23.25"
@@ -11824,6 +12089,26 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f"


[[package]]
name = "rustls-webpki"
version = "0.100.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f6a5fc258f1c1276dfe3016516945546e2d5383911efc0fc4f1cdc5df3a4ae3"
dependencies = [
"ring 0.16.20",
"untrusted 0.7.1",
]

[[package]]
name = "rustls-webpki"
version = "0.101.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
dependencies = [
"ring 0.17.14",
"untrusted 0.9.0",
]

[[package]] [[package]]
name = "rustls-webpki" name = "rustls-webpki"
version = "0.102.8" version = "0.102.8"
@@ -12228,9 +12513,9 @@ dependencies = [


[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.140"
version = "1.0.141"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
dependencies = [ dependencies = [
"indexmap 2.8.0", "indexmap 2.8.0",
"itoa", "itoa",
@@ -12239,6 +12524,16 @@ dependencies = [
"serde", "serde",
] ]


[[package]]
name = "serde_path_to_error"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fab13f937fa393d08645bf3a84bdfe86e296747b506ada67bb15f10f218b2a"
dependencies = [
"itoa",
"serde",
]

[[package]] [[package]]
name = "serde_plain" name = "serde_plain"
version = "1.0.2" version = "1.0.2"
@@ -12268,6 +12563,15 @@ dependencies = [
"serde", "serde",
] ]


[[package]]
name = "serde_spanned"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83"
dependencies = [
"serde",
]

[[package]] [[package]]
name = "serde_urlencoded" name = "serde_urlencoded"
version = "0.7.1" version = "0.7.1"
@@ -13221,7 +13525,7 @@ dependencies = [
"cfg-expr", "cfg-expr",
"heck 0.5.0", "heck 0.5.0",
"pkg-config", "pkg-config",
"toml",
"toml 0.8.20",
"version-compare", "version-compare",
] ]


@@ -13257,6 +13561,12 @@ version = "0.12.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"


[[package]]
name = "target-triple"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ac9aa371f599d22256307c24a9d748c041e548cbf599f35d890f9d365361790"

[[package]] [[package]]
name = "tempfile" name = "tempfile"
version = "3.19.1" version = "3.19.1"
@@ -13515,6 +13825,16 @@ dependencies = [
"zerovec", "zerovec",
] ]


[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]

[[package]] [[package]]
name = "tinyvec" name = "tinyvec"
version = "1.9.0" version = "1.9.0"
@@ -13540,7 +13860,7 @@ dependencies = [
"pin-project-lite", "pin-project-lite",
"thiserror 2.0.12", "thiserror 2.0.12",
"tokio", "tokio",
"tokio-rustls",
"tokio-rustls 0.26.2",
] ]


[[package]] [[package]]
@@ -13638,6 +13958,16 @@ dependencies = [
"tokio", "tokio",
] ]


[[package]]
name = "tokio-rustls"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
dependencies = [
"rustls 0.21.12",
"tokio",
]

[[package]] [[package]]
name = "tokio-rustls" name = "tokio-rustls"
version = "0.26.2" version = "0.26.2"
@@ -13720,11 +14050,26 @@ checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148"
dependencies = [ dependencies = [
"indexmap 2.8.0", "indexmap 2.8.0",
"serde", "serde",
"serde_spanned",
"toml_datetime",
"serde_spanned 0.6.8",
"toml_datetime 0.6.8",
"toml_edit", "toml_edit",
] ]


[[package]]
name = "toml"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41ae868b5a0f67631c14589f7e250c1ea2c574ee5ba21c6c8dd4b1485705a5a1"
dependencies = [
"indexmap 2.8.0",
"serde",
"serde_spanned 1.0.0",
"toml_datetime 0.7.0",
"toml_parser",
"toml_writer",
"winnow",
]

[[package]] [[package]]
name = "toml_datetime" name = "toml_datetime"
version = "0.6.8" version = "0.6.8"
@@ -13734,6 +14079,15 @@ dependencies = [
"serde", "serde",
] ]


[[package]]
name = "toml_datetime"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3"
dependencies = [
"serde",
]

[[package]] [[package]]
name = "toml_edit" name = "toml_edit"
version = "0.22.24" version = "0.22.24"
@@ -13742,11 +14096,26 @@ checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474"
dependencies = [ dependencies = [
"indexmap 2.8.0", "indexmap 2.8.0",
"serde", "serde",
"serde_spanned",
"toml_datetime",
"serde_spanned 0.6.8",
"toml_datetime 0.6.8",
"winnow",
]

[[package]]
name = "toml_parser"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97200572db069e74c512a14117b296ba0a80a30123fbbb5aa1f4a348f639ca30"
dependencies = [
"winnow", "winnow",
] ]


[[package]]
name = "toml_writer"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcc842091f2def52017664b53082ecbbeb5c7731092bad69d2c63050401dfd64"

[[package]] [[package]]
name = "tonic" name = "tonic"
version = "0.12.3" version = "0.12.3"
@@ -13755,7 +14124,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"async-trait", "async-trait",
"axum",
"axum 0.7.9",
"base64 0.22.1", "base64 0.22.1",
"bytes", "bytes",
"h2 0.4.8", "h2 0.4.8",
@@ -13772,7 +14141,7 @@ dependencies = [
"rustls-pemfile 2.2.0", "rustls-pemfile 2.2.0",
"socket2 0.5.8", "socket2 0.5.8",
"tokio", "tokio",
"tokio-rustls",
"tokio-rustls 0.26.2",
"tokio-stream", "tokio-stream",
"tower 0.4.13", "tower 0.4.13",
"tower-layer", "tower-layer",
@@ -13858,6 +14227,7 @@ dependencies = [
"tokio", "tokio",
"tower-layer", "tower-layer",
"tower-service", "tower-service",
"tracing",
] ]


[[package]] [[package]]
@@ -14005,6 +14375,21 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"


[[package]]
name = "trybuild"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65af40ad689f2527aebbd37a0a816aea88ff5f774ceabe99de5be02f2f91dae2"
dependencies = [
"glob",
"serde",
"serde_derive",
"serde_json",
"target-triple",
"termcolor",
"toml 0.9.4",
]

[[package]] [[package]]
name = "ttf-parser" name = "ttf-parser"
version = "0.25.1" version = "0.25.1"
@@ -14352,7 +14737,7 @@ checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
dependencies = [ dependencies = [
"getrandom 0.3.2", "getrandom 0.3.2",
"js-sys", "js-sys",
"rand 0.9.1",
"rand 0.9.2",
"serde", "serde",
"uuid-macro-internal", "uuid-macro-internal",
"wasm-bindgen", "wasm-bindgen",
@@ -14810,6 +15195,15 @@ dependencies = [
"webpki", "webpki",
] ]


[[package]]
name = "webpki-roots"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
dependencies = [
"rustls-webpki 0.100.3",
]

[[package]] [[package]]
name = "webpki-roots" name = "webpki-roots"
version = "0.26.8" version = "0.26.8"
@@ -15676,9 +16070,9 @@ dependencies = [


[[package]] [[package]]
name = "winnow" name = "winnow"
version = "0.7.4"
version = "0.7.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e97b544156e9bebe1a0ffbc03484fc1ffe3100cbce3ffb17eac35f7cdd7ab36"
checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]
@@ -15888,6 +16282,12 @@ dependencies = [
"linked-hash-map", "linked-hash-map",
] ]


[[package]]
name = "yansi"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"

[[package]] [[package]]
name = "yoke" name = "yoke"
version = "0.7.5" version = "0.7.5"
@@ -16549,7 +16949,7 @@ dependencies = [
"time", "time",
"tls-listener", "tls-listener",
"tokio", "tokio",
"tokio-rustls",
"tokio-rustls 0.26.2",
"tokio-util", "tokio-util",
"tracing", "tracing",
"webpki-roots 0.26.8", "webpki-roots 0.26.8",


+ 1
- 0
Cargo.toml View File

@@ -34,6 +34,7 @@ members = [
"node-hub/dora-rerun", "node-hub/dora-rerun",
"node-hub/terminal-print", "node-hub/terminal-print",
"node-hub/openai-proxy-server", "node-hub/openai-proxy-server",
"node-hub/dora-openai-websocket",
"node-hub/dora-kit-car", "node-hub/dora-kit-car",
"node-hub/dora-object-to-pose", "node-hub/dora-object-to-pose",
"node-hub/dora-mistral-rs", "node-hub/dora-mistral-rs",


+ 1
- 0
binaries/cli/src/command/coordinator.rs View File

@@ -9,6 +9,7 @@ use dora_tracing::TracingBuilder;
use eyre::Context; use eyre::Context;
use std::net::{IpAddr, SocketAddr}; use std::net::{IpAddr, SocketAddr};
use tokio::runtime::Builder; use tokio::runtime::Builder;
#[cfg(feature = "tracing")]
use tracing::level_filters::LevelFilter; use tracing::level_filters::LevelFilter;


#[derive(Debug, clap::Args)] #[derive(Debug, clap::Args)]


+ 1
- 0
binaries/cli/src/command/daemon.rs View File

@@ -14,6 +14,7 @@ use std::{
path::PathBuf, path::PathBuf,
}; };
use tokio::runtime::Builder; use tokio::runtime::Builder;
#[cfg(feature = "tracing")]
use tracing::level_filters::LevelFilter; use tracing::level_filters::LevelFilter;


#[derive(Debug, clap::Args)] #[derive(Debug, clap::Args)]


+ 16
- 16
binaries/cli/src/command/mod.rs View File

@@ -16,22 +16,22 @@ mod up;


pub use run::run_func; pub use run::run_func;


use build::Build;
use check::Check;
use coordinator::Coordinator;
use daemon::Daemon;
use destroy::Destroy;
use eyre::Context;
use graph::Graph;
use list::ListArgs;
use logs::LogsArgs;
use new::NewArgs;
use run::Run;
use runtime::Runtime;
use self_::SelfSubCommand;
use start::Start;
use stop::Stop;
use up::Up;
pub use build::Build;
pub use check::Check;
pub use coordinator::Coordinator;
pub use daemon::Daemon;
pub use destroy::Destroy;
pub use eyre::Context;
pub use graph::Graph;
pub use list::ListArgs;
pub use logs::LogsArgs;
pub use new::NewArgs;
pub use run::Run;
pub use runtime::Runtime;
pub use self_::SelfSubCommand;
pub use start::Start;
pub use stop::Stop;
pub use up::Up;


/// dora-rs cli client /// dora-rs cli client
#[derive(Debug, clap::Subcommand)] #[derive(Debug, clap::Subcommand)]


+ 1
- 0
binaries/cli/src/command/run.rs View File

@@ -12,6 +12,7 @@ use crate::{
session::DataflowSession, session::DataflowSession,
}; };
use dora_daemon::{flume, Daemon, LogDestination}; use dora_daemon::{flume, Daemon, LogDestination};
#[cfg(feature = "tracing")]
use dora_tracing::TracingBuilder; use dora_tracing::TracingBuilder;
use eyre::Context; use eyre::Context;
use tokio::runtime::Builder; use tokio::runtime::Builder;


+ 8
- 8
binaries/cli/src/command/start/mod.rs View File

@@ -31,28 +31,28 @@ mod attach;
pub struct Start { pub struct Start {
/// Path to the dataflow descriptor file /// Path to the dataflow descriptor file
#[clap(value_name = "PATH")] #[clap(value_name = "PATH")]
dataflow: String,
pub dataflow: String,
/// Assign a name to the dataflow /// Assign a name to the dataflow
#[clap(long)] #[clap(long)]
name: Option<String>,
pub name: Option<String>,
/// Address of the dora coordinator /// Address of the dora coordinator
#[clap(long, value_name = "IP", default_value_t = LOCALHOST)] #[clap(long, value_name = "IP", default_value_t = LOCALHOST)]
coordinator_addr: IpAddr,
pub coordinator_addr: IpAddr,
/// Port number of the coordinator control server /// Port number of the coordinator control server
#[clap(long, value_name = "PORT", default_value_t = DORA_COORDINATOR_PORT_CONTROL_DEFAULT)] #[clap(long, value_name = "PORT", default_value_t = DORA_COORDINATOR_PORT_CONTROL_DEFAULT)]
coordinator_port: u16,
pub coordinator_port: u16,
/// Attach to the dataflow and wait for its completion /// Attach to the dataflow and wait for its completion
#[clap(long, action)] #[clap(long, action)]
attach: bool,
pub attach: bool,
/// Run the dataflow in background /// Run the dataflow in background
#[clap(long, action)] #[clap(long, action)]
detach: bool,
pub detach: bool,
/// Enable hot reloading (Python only) /// Enable hot reloading (Python only)
#[clap(long, action)] #[clap(long, action)]
hot_reload: bool,
pub hot_reload: bool,
// Use UV to run nodes. // Use UV to run nodes.
#[clap(long, action)] #[clap(long, action)]
uv: bool,
pub uv: bool,
} }


impl Executable for Start { impl Executable for Start {


+ 1
- 1
binaries/cli/src/lib.rs View File

@@ -5,7 +5,7 @@ use std::{
path::PathBuf, path::PathBuf,
}; };


mod command;
pub mod command;
mod common; mod common;
mod formatting; mod formatting;
pub mod output; pub mod output;


+ 78
- 0
examples/openai-realtime/README.md View File

@@ -0,0 +1,78 @@
# Dora-OpenAI-Realtime (ROOT Repo)

## Front End

### Build Client

```bash
git clone git@github.com:haixuanTao/moly.git --branch dora-backend-support
cd moly
cargo build --release
```

### Run Client

```bash
cd moly
cargo run -r
```

## Server

### Build server

```bash
uv venv --seed -p 3.11
source .venv/bin/activate
uv pip install dora-rs-cli dora-rs
dora build whisper-template-metal.yml --uv ## very long process
```

### Run server

```bash
source .venv/bin/activate
dora up
cargo run --release -p dora-openai-websocket
```

## On finish

```bash
dora destroy
```

## GUI

- Go to MolyServer Tab
- Add a custom Provider
- In API Host, use:

- Name: dora-websocket
- API Host: ws://127.0.0.1:8123
- Type: OpenAI Realtime

- Then go to Chat Tab
- New Chat
- ( Make sure the servver is running with: `cargo run --release -p dora-openai-websocket`)
- On bottom right, click on 🎧 icon.
> If nothing happen is that the server is not found.
- Click on start
- Wait for the first AI greeting
- Start speaking!
- You should get AI response!

### WIP: Moyoyo

## {Recommended} Install git-lfs

```bash
brew install git-lfs # MacOS
```

## Clone Moxin Voice Chat

```bash
git lfs install
git clone https://github.com/moxin-org/moxin-voice-chat.git
```

+ 54
- 0
examples/openai-realtime/whisper-template-metal.yml View File

@@ -0,0 +1,54 @@
nodes:
- id: NODE_ID
path: dynamic
inputs:
audio: tts/audio
text: stt/text
speech_started: stt/speech_started
outputs:
- audio
- text

- id: dora-vad
build: pip install -e ../../node-hub/dora-vad
path: dora-vad
inputs:
audio:
source: NODE_ID/audio
queue_size: 1000000
outputs:
- audio
env:
MIN_SPEECH_DURATION_MS: 1000
MIN_SILENCE_DURATION_MS: 1000
THRESHOLD: 0.5

- id: stt
build: pip install -e ../../node-hub/dora-distil-whisper
path: dora-distil-whisper
inputs:
audio: dora-vad/audio
outputs:
- text
- word
- speech_started

- id: llm
build: pip install -e ../../node-hub/dora-qwen
path: dora-qwen
inputs:
text: stt/text
text_to_audio: NODE_ID/text
outputs:
- text
env:
MODEL_NAME_OR_PATH: LLM_ID
MODEL_FILE_PATTERN: "*[qQ]6_[kK].[gG][gG][uU][fF]"

- id: tts
build: pip install -e ../../node-hub/dora-kokoro-tts
path: dora-kokoro-tts
inputs:
text: llm/text
outputs:
- audio

+ 77
- 35
node-hub/dora-distil-whisper/dora_distil_whisper/main.py View File

@@ -6,6 +6,7 @@ import sys
import time import time
from pathlib import Path from pathlib import Path


import numpy as np
import pyarrow as pa import pyarrow as pa
import torch import torch
from dora import Node from dora import Node
@@ -47,35 +48,6 @@ def remove_text_noise(text: str, text_noise="") -> str:
text_words = normalized_text.split() text_words = normalized_text.split()
noise_words = normalized_noise.split() noise_words = normalized_noise.split()


# Function to find and remove noise sequence flexibly
def remove_flexible(text_list, noise_list):
i = 0
while i <= len(text_list) - len(noise_list):
match = True
extra_words = 0
for j, noise_word in enumerate(noise_list):
if i + j + extra_words >= len(text_list):
match = False
break
# Allow skipping extra words in text_list
while (
i + j + extra_words < len(text_list)
and text_list[i + j + extra_words] != noise_word
):
extra_words += 1
if i + j + extra_words >= len(text_list):
match = False
break
if not match:
break
if match:
# Remove matched part
del text_list[i : i + len(noise_list) + extra_words]
i = max(0, i - len(noise_list)) # Adjust index after removal
else:
i += 1
return text_list

# Only remove parts of text_noise that are found in text # Only remove parts of text_noise that are found in text
cleaned_words = text_words[:] cleaned_words = text_words[:]
for noise_word in noise_words: for noise_word in noise_words:
@@ -124,7 +96,30 @@ def load_model():
BAD_SENTENCES = [ BAD_SENTENCES = [
"", "",
" so", " so",
" So.",
" So, let's go.",
" so so", " so so",
" What?",
" We'll see you next time.",
" I'll see you next time.",
" We're going to come back.",
" let's move on.",
" Here we go.",
" my",
" All right. Thank you.",
" That's what we're doing.",
" That's what I wanted to do.",
" I'll be back.",
" Hold this. Hold this.",
" Hold this one. Hold this one.",
" And we'll see you next time.",
" strength.",
" Length.",
" Let's go.",
" Let's do it.",
"You",
"You ",
" You",
"字幕", "字幕",
"字幕志愿", "字幕志愿",
"中文字幕", "中文字幕",
@@ -181,13 +176,29 @@ def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):


def main(): def main():
"""TODO: Add docstring.""" """TODO: Add docstring."""
node = Node()
text_noise = "" text_noise = ""
noise_timestamp = time.time()
# For macos use mlx: # For macos use mlx:
if sys.platform != "darwin": if sys.platform != "darwin":
pipe = load_model() pipe = load_model()
else:
import mlx_whisper


result = mlx_whisper.transcribe(
np.array([]),
path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
append_punctuations=".",
language=TARGET_LANGUAGE,
)
result = mlx_whisper.transcribe(
np.array([]),
path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
append_punctuations=".",
language=TARGET_LANGUAGE,
)

node = Node()
noise_timestamp = time.time()
cache_audio = None
for event in node: for event in node:
if event["type"] == "INPUT": if event["type"] == "INPUT":
if "text_noise" in event["id"]: if "text_noise" in event["id"]:
@@ -200,7 +211,12 @@ def main():
) )
noise_timestamp = time.time() noise_timestamp = time.time()
else: else:
audio = event["value"].to_numpy()
audio_input = event["value"].to_numpy()
if cache_audio is not None:
audio = np.concatenate([cache_audio, audio_input])
else:
audio = audio_input

confg = ( confg = (
{"language": TARGET_LANGUAGE, "task": "translate"} {"language": TARGET_LANGUAGE, "task": "translate"}
if TRANSLATE if TRANSLATE
@@ -215,6 +231,7 @@ def main():
audio, audio,
path_or_hf_repo="mlx-community/whisper-large-v3-turbo", path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
append_punctuations=".", append_punctuations=".",
language=TARGET_LANGUAGE,
) )


else: else:
@@ -223,6 +240,8 @@ def main():
generate_kwargs=confg, generate_kwargs=confg,
) )
if result["text"] in BAD_SENTENCES: if result["text"] in BAD_SENTENCES:
print("Discarded text: ", result["text"])
# cache_audio = None
continue continue
text = cut_repetition(result["text"]) text = cut_repetition(result["text"])


@@ -235,6 +254,29 @@ def main():


if text.strip() == "" or text.strip() == ".": if text.strip() == "" or text.strip() == ".":
continue continue
node.send_output(
"text", pa.array([text]), {"language": TARGET_LANGUAGE},
)

if (
text.endswith(".")
or text.endswith("!")
or text.endswith("?")
or text.endswith('."')
or text.endswith('!"')
or text.endswith('?"')
) and not text.endswith("..."):
node.send_output(
"text",
pa.array([text]),
)
node.send_output(
"speech_started",
pa.array([text]),
)
cache_audio = None
audio = None
print("Text:", text)
elif text.endswith("..."):
print(
"Keeping audio in cache for next text output with punctuation"
)
print("Discarded text", text)
cache_audio = audio

+ 20
- 17
node-hub/dora-kokoro-tts/dora_kokoro_tts/main.py View File

@@ -1,4 +1,5 @@
"""TODO: Add docstring.""" """TODO: Add docstring."""

import os import os
import re import re


@@ -8,11 +9,12 @@ from kokoro import KPipeline


LANGUAGE = os.getenv("LANGUAGE", "en") LANGUAGE = os.getenv("LANGUAGE", "en")



def main(): def main():
"""TODO: Add docstring.""" """TODO: Add docstring."""
if LANGUAGE in ["en", "english"]: if LANGUAGE in ["en", "english"]:
pipeline = KPipeline(lang_code="a") pipeline = KPipeline(lang_code="a")
elif LANGUAGE in ["zh","ch","chinese"]:
elif LANGUAGE in ["zh", "ch", "chinese"]:
pipeline = KPipeline(lang_code="z") pipeline = KPipeline(lang_code="z")
else: else:
print("warning: Defaulting to english speaker as language not found") print("warning: Defaulting to english speaker as language not found")
@@ -22,22 +24,23 @@ def main():


for event in node: for event in node:
if event["type"] == "INPUT": if event["type"] == "INPUT":
if event["id"] == "text":
text = event["value"][0].as_py()
if re.findall(r'[\u4e00-\u9fff]+', text):
pipeline = KPipeline(lang_code="z")
elif pipeline.lang_code != "a":
pipeline = KPipeline(lang_code="a") # <= make sure lang_code matches voice

generator = pipeline(
text,
voice="af_heart", # <= change voice here
speed=1.2,
split_pattern=r"\n+",
)
for _, (_, _, audio) in enumerate(generator):
audio = audio.numpy()
node.send_output("audio", pa.array(audio), {"sample_rate": 24000})
text = event["value"][0].as_py()
if re.findall(r"[\u4e00-\u9fff]+", text):
pipeline = KPipeline(lang_code="z")
elif pipeline.lang_code != "a":
pipeline = KPipeline(
lang_code="a"
) # <= make sure lang_code matches voice

generator = pipeline(
text,
voice="af_heart", # <= change voice here
speed=1.2,
split_pattern=r"\n+",
)
for _, (_, _, audio) in enumerate(generator):
audio = audio.numpy()
node.send_output("audio", pa.array(audio), {"sample_rate": 24000})




if __name__ == "__main__": if __name__ == "__main__":


+ 5
- 1
node-hub/dora-kokoro-tts/pyproject.toml View File

@@ -5,13 +5,14 @@ authors = [{ name = "Your Name", email = "email@email.com" }]
description = "dora-kokoro-tts" description = "dora-kokoro-tts"
license = { text = "MIT" } license = { text = "MIT" }
readme = "README.md" readme = "README.md"
requires-python = ">=3.8"
requires-python = ">=3.10"


dependencies = [ dependencies = [
"dora-rs >= 0.3.9", "dora-rs >= 0.3.9",
"kokoro>=0.2.2", "kokoro>=0.2.2",
"soundfile>=0.13.1", "soundfile>=0.13.1",
"misaki[zh]", "misaki[zh]",
"en-core-web-sm",
] ]


[dependency-groups] [dependency-groups]
@@ -31,3 +32,6 @@ extend-select = [
"N", # Ruff's N rule "N", # Ruff's N rule
"I", # Ruff's I rule "I", # Ruff's I rule
] ]

[tool.uv.sources]
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }

+ 31
- 0
node-hub/dora-openai-websocket/Cargo.toml View File

@@ -0,0 +1,31 @@
[package]
name = "dora-openai-websocket"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
dora-node-api = { workspace = true }
dora-cli = { path = "../../binaries/cli", default-features = false }
tokio = { version = "1.25.0", features = ["full", "macros"] }
tokio-rustls = "0.24.0"
rustls-pemfile = "1.0"
hyper-util = { version = "0.1.0", features = ["tokio"] }
http-body-util = { version = "0.1.0" }
hyper = { version = "1", features = ["http1", "server", "client"] }
assert2 = "0.3.4"
trybuild = "1.0.106"
criterion = "0.4.0"
anyhow = "1.0.71"
webpki-roots = "0.23.0"
bytes = "1.4.0"
axum = "0.8.1"
fastwebsockets = { version = "0.10.0", features = ["upgrade"] }
serde_json = "1.0.141"
serde = "1.0.219"
base = "0.1.0"
base64 = "0.22.1"
rand = "0.9.2"
futures-util = "0.3.31"
futures-concurrency = "7.6.3"

+ 513
- 0
node-hub/dora-openai-websocket/src/main.rs View File

@@ -0,0 +1,513 @@
// Copyright 2023 Divy Srivastava <dj.srivastava23@gmail.com>
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use base64::engine::general_purpose;
use base64::Engine;
use dora_cli::command::Executable;
use dora_cli::command::Start;
use dora_node_api::arrow::array::AsArray;
use dora_node_api::arrow::datatypes::DataType;
use dora_node_api::dora_core::config::DataId;
use dora_node_api::dora_core::config::NodeId;
use dora_node_api::dora_core::topics::DORA_COORDINATOR_PORT_CONTROL_DEFAULT;
use dora_node_api::into_vec;
use dora_node_api::DoraNode;
use dora_node_api::IntoArrow;
use dora_node_api::MetadataParameters;
use fastwebsockets::upgrade;
use fastwebsockets::Frame;
use fastwebsockets::OpCode;
use fastwebsockets::Payload;
use fastwebsockets::WebSocketError;
use futures_concurrency::future::Race;
use futures_util::future;
use futures_util::future::Either;
use futures_util::FutureExt;
use http_body_util::Empty;
use hyper::body::Bytes;
use hyper::body::Incoming;
use hyper::server::conn::http1;
use hyper::service::service_fn;
use hyper::Request;
use hyper::Response;
use rand::random;
use serde;
use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::fs;
use std::io::{self, Write};
use std::net::IpAddr;
use std::net::Ipv4Addr;
use tokio::net::TcpListener;
#[derive(Serialize, Deserialize, Debug)]
pub struct ErrorDetails {
pub code: Option<String>,
pub message: String,
pub param: Option<String>,
#[serde(rename = "type")]
pub error_type: Option<String>,
}

#[derive(Serialize, Deserialize, Debug)]
#[serde(tag = "type")]
pub enum OpenAIRealtimeMessage {
#[serde(rename = "session.update")]
SessionUpdate { session: SessionConfig },
#[serde(rename = "input_audio_buffer.append")]
InputAudioBufferAppend {
audio: String, // base64 encoded audio
},
#[serde(rename = "input_audio_buffer.commit")]
InputAudioBufferCommit,
#[serde(rename = "response.create")]
ResponseCreate { response: ResponseConfig },
#[serde(rename = "conversation.item.create")]
ConversationItemCreate { item: ConversationItem },
#[serde(rename = "conversation.item.truncate")]
ConversationItemTruncate {
item_id: String,
content_index: u32,
audio_end_ms: u32,
#[serde(skip_serializing_if = "Option::is_none")]
event_id: Option<String>,
},
}

#[derive(Serialize, Deserialize, Debug)]
pub struct SessionConfig {
pub modalities: Vec<String>,
pub instructions: String,
pub voice: String,
pub model: String,
pub input_audio_format: String,
pub output_audio_format: String,
pub input_audio_transcription: Option<TranscriptionConfig>,
pub turn_detection: Option<TurnDetectionConfig>,
pub tools: Vec<serde_json::Value>,
pub tool_choice: String,
pub temperature: f32,
pub max_response_output_tokens: Option<u32>,
}

#[derive(Serialize, Deserialize, Debug)]
pub struct TranscriptionConfig {
pub model: String,
}

#[derive(Serialize, Deserialize, Debug)]
pub struct TurnDetectionConfig {
#[serde(rename = "type")]
pub detection_type: String,
pub threshold: f32,
pub prefix_padding_ms: u32,
pub silence_duration_ms: u32,
pub interrupt_response: bool,
pub create_response: bool,
}

#[derive(Serialize, Deserialize, Debug)]
pub struct ResponseConfig {
pub modalities: Vec<String>,
pub instructions: Option<String>,
pub voice: Option<String>,
pub output_audio_format: Option<String>,
pub tools: Option<Vec<serde_json::Value>>,
pub tool_choice: Option<String>,
pub temperature: Option<f32>,
pub max_output_tokens: Option<u32>,
}

#[derive(Serialize, Deserialize, Debug)]
pub struct ConversationItem {
pub id: Option<String>,
#[serde(rename = "type")]
pub item_type: String,
pub status: Option<String>,
pub role: String,
pub content: Vec<ContentPart>,
}

#[derive(Serialize, Deserialize, Debug)]
#[serde(tag = "type")]
pub enum ContentPart {
#[serde(rename = "input_text")]
InputText { text: String },
#[serde(rename = "input_audio")]
InputAudio {
audio: String,
transcript: Option<String>,
},
#[serde(rename = "text")]
Text { text: String },
#[serde(rename = "audio")]
Audio {
audio: String,
transcript: Option<String>,
},
}

// Incoming message types from OpenAI
#[derive(Serialize, Deserialize, Debug)]
#[serde(tag = "type")]
pub enum OpenAIRealtimeResponse {
#[serde(rename = "error")]
Error { error: ErrorDetails },
#[serde(rename = "session.created")]
SessionCreated { session: serde_json::Value },
#[serde(rename = "session.updated")]
SessionUpdated { session: serde_json::Value },
#[serde(rename = "conversation.item.created")]
ConversationItemCreated { item: serde_json::Value },
#[serde(rename = "conversation.item.truncated")]
ConversationItemTruncated { item: serde_json::Value },
#[serde(rename = "response.audio.delta")]
ResponseAudioDelta {
response_id: String,
item_id: String,
output_index: u32,
content_index: u32,
delta: String, // base64 encoded audio
},
#[serde(rename = "response.audio.done")]
ResponseAudioDone {
response_id: String,
item_id: String,
output_index: u32,
content_index: u32,
},
#[serde(rename = "response.text.delta")]
ResponseTextDelta {
response_id: String,
item_id: String,
output_index: u32,
content_index: u32,
delta: String,
},
#[serde(rename = "response.audio_transcript.delta")]
ResponseAudioTranscriptDelta {
response_id: String,
item_id: String,
output_index: u32,
content_index: u32,
delta: String,
},
#[serde(rename = "response.done")]
ResponseDone { response: serde_json::Value },
#[serde(rename = "input_audio_buffer.speech_started")]
InputAudioBufferSpeechStarted {
audio_start_ms: u32,
item_id: String,
},
#[serde(rename = "input_audio_buffer.speech_stopped")]
InputAudioBufferSpeechStopped { audio_end_ms: u32, item_id: String },
#[serde(other)]
Other,
}

fn convert_pcm16_to_f32(bytes: &[u8]) -> Vec<f32> {
let mut samples = Vec::with_capacity(bytes.len() / 2);

for chunk in bytes.chunks_exact(2) {
let pcm16_sample = i16::from_le_bytes([chunk[0], chunk[1]]);
let f32_sample = pcm16_sample as f32 / 32767.0;
samples.push(f32_sample);
}

samples
}

fn convert_f32_to_pcm16(samples: &[f32]) -> Vec<u8> {
let mut pcm16_bytes = Vec::with_capacity(samples.len() * 2);

for &sample in samples {
// Clamp to [-1.0, 1.0] and convert to i16
let clamped = sample.max(-1.0).min(1.0);
let pcm16_sample = (clamped * 32767.0) as i16;
pcm16_bytes.extend_from_slice(&pcm16_sample.to_le_bytes());
}

pcm16_bytes
}

/// Replaces a placeholder in a file and writes the result to an output file.
///
/// # Arguments
///
/// * `input_path` - Path to the input file with placeholder text.
/// * `placeholder` - The placeholder text to search for (e.g., "{{PLACEHOLDER}}").
/// * `replacement` - The text to replace the placeholder with.
/// * `output_path` - Path to write the modified content.
fn replace_placeholder_in_file(
input_path: &str,
replacement: &HashMap<String, String>,
output_path: &str,
) -> io::Result<()> {
// Read the file content into a string
let mut content = fs::read_to_string(input_path)?;

// Replace the placeholder
for (placeholder, replacement) in replacement {
// Ensure the placeholder is wrapped in curly braces
// Replace the placeholder with the replacement text
content = content.replace(placeholder, replacement);
}

// Write the modified content to the output file
let mut file = fs::File::create(output_path)?;
file.write_all(content.as_bytes())?;

Ok(())
}

async fn handle_client(fut: upgrade::UpgradeFut) -> Result<(), WebSocketError> {
let mut ws = fastwebsockets::FragmentCollector::new(fut.await?);

let frame = ws.read_frame().await?;
if frame.opcode != OpCode::Text {
return Err(WebSocketError::InvalidConnectionHeader);
}
let data: OpenAIRealtimeMessage = serde_json::from_slice(&frame.payload).unwrap();
let OpenAIRealtimeMessage::SessionUpdate { session } = data else {
return Err(WebSocketError::InvalidConnectionHeader);
};

let input_audio_transcription = session
.input_audio_transcription
.map_or("moyoyo-whisper".to_string(), |t| t.model);
let llm = session.model.clone();
let id = random::<u16>();
let node_id = format!("server-{id}");
let dataflow = format!("{input_audio_transcription}-{}.yml", id);
let template = format!("{input_audio_transcription}-template-metal.yml");
let mut replacements = HashMap::new();
replacements.insert("NODE_ID".to_string(), node_id.clone());
replacements.insert("LLM_ID".to_string(), llm);
println!("Filling template: {}", template);
replace_placeholder_in_file(&template, &replacements, &dataflow).unwrap();
// Copy configuration file but replace the node ID with "server-id"
// Read the configuration file and replace the node ID with "server-id"
dora_cli::command::Command::Start(Start {
dataflow,
name: Some(node_id.to_string()),
coordinator_addr: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
coordinator_port: DORA_COORDINATOR_PORT_CONTROL_DEFAULT,
attach: false,
detach: true,
hot_reload: false,
uv: true,
})
.execute()
.unwrap();
let (mut node, mut events) =
DoraNode::init_from_node_id(NodeId::from(node_id.clone())).unwrap();
let serialized_data = OpenAIRealtimeResponse::SessionCreated {
session: serde_json::Value::Null,
};

let payload =
Payload::Bytes(Bytes::from(serde_json::to_string(&serialized_data).unwrap()).into());
let frame = Frame::text(payload);
ws.write_frame(frame).await?;
loop {
let event_fut = events.recv_async().map(Either::Left);
let frame_fut = ws.read_frame().map(Either::Right);
let event_stream = (event_fut, frame_fut).race();
let mut finished = false;
let frame = match event_stream.await {
future::Either::Left(Some(ev)) => {
let frame = match ev {
dora_node_api::Event::Input {
id,
metadata: _,
data,
} => {
if data.data_type() == &DataType::Utf8 {
let data = data.as_string::<i32>();
let str = data.value(0);
let serialized_data =
OpenAIRealtimeResponse::ResponseAudioTranscriptDelta {
response_id: "123".to_string(),
item_id: "123".to_string(),
output_index: 123,
content_index: 123,
delta: str.to_string(),
};

let frame = Frame::text(Payload::Bytes(
Bytes::from(serde_json::to_string(&serialized_data).unwrap())
.into(),
));
frame
} else if id.contains("audio") {
let data: Vec<f32> = into_vec(&data).unwrap();
let data = convert_f32_to_pcm16(&data);
let serialized_data = OpenAIRealtimeResponse::ResponseAudioDelta {
response_id: "123".to_string(),
item_id: "123".to_string(),
output_index: 123,
content_index: 123,
delta: general_purpose::STANDARD.encode(data),
};
finished = true;

let frame = Frame::text(Payload::Bytes(
Bytes::from(serde_json::to_string(&serialized_data).unwrap())
.into(),
));
frame
} else if id.contains("") {
let serialized_data =
OpenAIRealtimeResponse::InputAudioBufferSpeechStarted {
audio_start_ms: 123,
item_id: "123".to_string(),
};
finished = true;

let frame = Frame::text(Payload::Bytes(
Bytes::from(serde_json::to_string(&serialized_data).unwrap())
.into(),
));
frame
} else {
unimplemented!()
}
}
dora_node_api::Event::Error(_) => {
// println!("Error in input: {}", s);
continue;
}
_ => break,
};
Some(frame)
}
future::Either::Left(None) => break,
future::Either::Right(Ok(frame)) => {
match frame.opcode {
OpCode::Close => break,
OpCode::Text | OpCode::Binary => {
let data: OpenAIRealtimeMessage =
serde_json::from_slice(&frame.payload).unwrap();

match data {
OpenAIRealtimeMessage::InputAudioBufferAppend { audio } => {
// println!("Received audio data: {}", audio);
let f32_data = audio;
// Decode base64 encoded audio data
let f32_data = f32_data.trim();
if f32_data.is_empty() {
continue;
}

if let Ok(f32_data) = general_purpose::STANDARD.decode(f32_data) {
let f32_data = convert_pcm16_to_f32(&f32_data);
// Downsample to 16 kHz from 24 kHz
let f32_data = f32_data
.into_iter()
.enumerate()
.filter(|(i, _)| i % 3 != 0)
.map(|(_, v)| v)
.collect::<Vec<f32>>();

let mut parameter = MetadataParameters::default();
parameter.insert(
"sample_rate".to_string(),
dora_node_api::Parameter::Integer(16000),
);
node.send_output(
DataId::from("audio".to_string()),
parameter,
f32_data.into_arrow(),
)
.unwrap();
}
}
OpenAIRealtimeMessage::InputAudioBufferCommit => break,
OpenAIRealtimeMessage::ResponseCreate { response } => {
if let Some(text) = response.instructions {
node.send_output(
DataId::from("text".to_string()),
Default::default(),
text.into_arrow(),
)
.unwrap();
}
}
_ => {}
}
}
_ => break,
}
None
}
future::Either::Right(Err(_)) => break,
};
if let Some(frame) = frame {
ws.write_frame(frame).await?;
}
if finished {
let serialized_data = OpenAIRealtimeResponse::ResponseDone {
response: serde_json::Value::Null,
};

let payload = Payload::Bytes(
Bytes::from(serde_json::to_string(&serialized_data).unwrap()).into(),
);
println!("Sending response done: {:?}", serialized_data);
let frame = Frame::text(payload);
ws.write_frame(frame).await?;
};
}

Ok(())
}
async fn server_upgrade(
mut req: Request<Incoming>,
) -> Result<Response<Empty<Bytes>>, WebSocketError> {
let (response, fut) = upgrade::upgrade(&mut req)?;

tokio::task::spawn(async move {
if let Err(e) = tokio::task::unconstrained(handle_client(fut)).await {
eprintln!("Error in websocket connection: {}", e);
}
});

Ok(response)
}

fn main() -> Result<(), WebSocketError> {
let rt = tokio::runtime::Builder::new_multi_thread()
.enable_io()
.enable_time()
.build()
.unwrap();

rt.block_on(async move {
let listener = TcpListener::bind("127.0.0.1:8123").await?;
println!("Server started, listening on {}", "127.0.0.1:8123");
loop {
let (stream, _) = listener.accept().await?;
println!("Client connected");
tokio::spawn(async move {
let io = hyper_util::rt::TokioIo::new(stream);
let conn_fut = http1::Builder::new()
.serve_connection(io, service_fn(server_upgrade))
.with_upgrades();
if let Err(e) = conn_fut.await {
println!("An error occurred: {:?}", e);
}
});
}
})
}

+ 21
- 27
node-hub/dora-qwen/dora_qwen/main.py View File

@@ -1,7 +1,6 @@
"""TODO: Add docstring.""" """TODO: Add docstring."""


import os import os
import sys


import pyarrow as pa import pyarrow as pa
from dora import Node from dora import Node
@@ -12,14 +11,24 @@ SYSTEM_PROMPT = os.getenv(
"You're a very succinct AI assistant with short answers.", "You're a very succinct AI assistant with short answers.",
) )


MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", "Qwen/Qwen2.5-0.5B-Instruct-GGUF")
MODEL_FILE_PATTERN = os.getenv("MODEL_FILE_PATTERN", "*fp16.gguf")
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "512"))
N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))
N_THREADS = int(os.getenv("N_THREADS", "4"))
CONTEXT_SIZE = int(os.getenv("CONTEXT_SIZE", "4096"))



def get_model_gguf(): def get_model_gguf():
"""TODO: Add docstring.""" """TODO: Add docstring."""
from llama_cpp import Llama from llama_cpp import Llama


return Llama.from_pretrained( return Llama.from_pretrained(
repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
filename="*fp16.gguf",
repo_id=MODEL_NAME_OR_PATH,
filename=MODEL_FILE_PATTERN,
n_gpu_layers=N_GPU_LAYERS,
n_ctx=CONTEXT_SIZE,
n_threads=N_THREADS,
verbose=False, verbose=False,
) )


@@ -71,12 +80,7 @@ def main():
"""TODO: Add docstring.""" """TODO: Add docstring."""
history = [] history = []
# If OS is not Darwin, use Huggingface model # If OS is not Darwin, use Huggingface model
if sys.platform == "darwin":
model = get_model_gguf()
elif sys.platform == "linux":
model, tokenizer = get_model_huggingface()
else:
model, tokenizer = get_model_darwin()
model = get_model_gguf()


node = Node() node = Node()


@@ -89,24 +93,14 @@ def main():
if len(ACTIVATION_WORDS) == 0 or any( if len(ACTIVATION_WORDS) == 0 or any(
word in ACTIVATION_WORDS for word in words word in ACTIVATION_WORDS for word in words
): ):
# On linux, Windows
if sys.platform == "darwin":
response = model.create_chat_completion(
messages=[{"role": "user", "content": text}], # Prompt
max_tokens=24,
)["choices"][0]["message"]["content"]
elif sys.platform == "linux":
response, history = generate_hf(model, tokenizer, text, history)
else:
from mlx_lm import generate

response = generate(
model,
tokenizer,
prompt=text,
verbose=False,
max_tokens=50,
)
history += [{"role": "user", "content": text}]

response = model.create_chat_completion(
messages=history, # Prompt
max_tokens=24,
)["choices"][0]["message"]["content"]

history += [{"role": "assistant", "content": response}]


node.send_output( node.send_output(
output_id="text", output_id="text",


+ 15
- 4
node-hub/dora-vad/dora_vad/main.py View File

@@ -36,21 +36,32 @@ def main():
threshold=THRESHOLD, threshold=THRESHOLD,
min_speech_duration_ms=MIN_SPEECH_DURATION_MS, min_speech_duration_ms=MIN_SPEECH_DURATION_MS,
min_silence_duration_ms=MIN_SILENCE_DURATION_MS, min_silence_duration_ms=MIN_SILENCE_DURATION_MS,
sampling_rate=sr,
) )

if len(speech_timestamps) == 0:
# If there is no speech, return the audio
continue
arg_max = np.argmax([ts["end"] - ts["start"] for ts in speech_timestamps])
# Check ig there is timestamp # Check ig there is timestamp
if ( if (
len(speech_timestamps) > 0 len(speech_timestamps) > 0
and len(audio) > MIN_AUDIO_SAMPLING_DURATION_MS * sr / 1000
and len(
audio[speech_timestamps[0]["start"] : speech_timestamps[-1]["end"]]
)
> MIN_AUDIO_SAMPLING_DURATION_MS * sr / 1000
and (
(len(audio) - speech_timestamps[arg_max]["end"])
> MIN_SILENCE_DURATION_MS / 1000 * sr * 5
)
): ):
# Check if the audio is not cut at the end. And only return if there is a long time spent # Check if the audio is not cut at the end. And only return if there is a long time spent
if speech_timestamps[-1]["end"] == len(audio): if speech_timestamps[-1]["end"] == len(audio):
node.send_output( node.send_output(
"timestamp_start", "timestamp_start",
pa.array([speech_timestamps[-1]["start"]]), pa.array([speech_timestamps[-1]["start"]]),
metadata={"sample_rate": sr},
) )
continue
audio = audio[0 : speech_timestamps[-1]["end"]]
audio = audio[: speech_timestamps[-1]["end"]]
node.send_output("audio", pa.array(audio), metadata={"sample_rate": sr}) node.send_output("audio", pa.array(audio), metadata={"sample_rate": sr})
last_audios = [audio[speech_timestamps[-1]["end"] :]] last_audios = [audio[speech_timestamps[-1]["end"] :]]




Loading…
Cancel
Save