Compare commits

...

17 changed files with 1277 additions and 143 deletions
Split View
  1. +434
    -34
      Cargo.lock
  2. +1
    -0
      Cargo.toml
  3. +1
    -0
      binaries/cli/src/command/coordinator.rs
  4. +1
    -0
      binaries/cli/src/command/daemon.rs
  5. +16
    -16
      binaries/cli/src/command/mod.rs
  6. +1
    -0
      binaries/cli/src/command/run.rs
  7. +8
    -8
      binaries/cli/src/command/start/mod.rs
  8. +1
    -1
      binaries/cli/src/lib.rs
  9. +78
    -0
      examples/openai-realtime/README.md
  10. +54
    -0
      examples/openai-realtime/whisper-template-metal.yml
  11. +77
    -35
      node-hub/dora-distil-whisper/dora_distil_whisper/main.py
  12. +20
    -17
      node-hub/dora-kokoro-tts/dora_kokoro_tts/main.py
  13. +5
    -1
      node-hub/dora-kokoro-tts/pyproject.toml
  14. +31
    -0
      node-hub/dora-openai-websocket/Cargo.toml
  15. +513
    -0
      node-hub/dora-openai-websocket/src/main.rs
  16. +21
    -27
      node-hub/dora-qwen/dora_qwen/main.py
  17. +15
    -4
      node-hub/dora-vad/dora_vad/main.py

+ 434
- 34
Cargo.lock View File

@@ -264,6 +264,12 @@ dependencies = [
"libc",
]

[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"

[[package]]
name = "ansi_colours"
version = "1.2.3"
@@ -666,7 +672,7 @@ dependencies = [
"enumflags2",
"futures-channel",
"futures-util",
"rand 0.9.1",
"rand 0.9.2",
"raw-window-handle 0.6.2",
"serde",
"serde_repr",
@@ -713,6 +719,29 @@ dependencies = [
"syn 2.0.101",
]

[[package]]
name = "assert2"
version = "0.3.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d6c710e60d14b07d8f42d0e702b16120865eea39edb751e75cd6bf401d18f14"
dependencies = [
"assert2-macros",
"diff",
"yansi",
]

[[package]]
name = "assert2-macros"
version = "0.3.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9008cbbba9e1d655538870b91fd93814bd82e6968f27788fc734375120ac6f57"
dependencies = [
"proc-macro2",
"quote",
"rustc_version",
"syn 2.0.101",
]

[[package]]
name = "assert_matches"
version = "1.5.0"
@@ -1179,14 +1208,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
dependencies = [
"async-trait",
"axum-core",
"axum-core 0.4.5",
"bytes",
"futures-util",
"http 1.3.1",
"http-body 1.0.1",
"http-body-util",
"itoa",
"matchit",
"matchit 0.7.3",
"memchr",
"mime",
"percent-encoding",
@@ -1199,6 +1228,40 @@ dependencies = [
"tower-service",
]

[[package]]
name = "axum"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5"
dependencies = [
"axum-core 0.5.2",
"bytes",
"form_urlencoded",
"futures-util",
"http 1.3.1",
"http-body 1.0.1",
"http-body-util",
"hyper 1.6.0",
"hyper-util",
"itoa",
"matchit 0.8.4",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tower 0.5.2",
"tower-layer",
"tower-service",
"tracing",
]

[[package]]
name = "axum-core"
version = "0.4.5"
@@ -1219,6 +1282,26 @@ dependencies = [
"tower-service",
]

[[package]]
name = "axum-core"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6"
dependencies = [
"bytes",
"futures-core",
"http 1.3.1",
"http-body 1.0.1",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper",
"tower-layer",
"tower-service",
"tracing",
]

[[package]]
name = "az"
version = "1.2.1"
@@ -1240,6 +1323,12 @@ dependencies = [
"windows-targets 0.52.6",
]

[[package]]
name = "base"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a956d500c2380c818e09d3d7c79ba4a1d7fc6354464f1fceaa5705483a29930"

[[package]]
name = "base64"
version = "0.13.1"
@@ -1640,7 +1729,7 @@ dependencies = [
"metal 0.27.0",
"num-traits",
"num_cpus",
"rand 0.9.1",
"rand 0.9.2",
"rand_distr",
"rayon",
"safetensors",
@@ -1732,6 +1821,12 @@ dependencies = [
"thiserror 1.0.69",
]

[[package]]
name = "cast"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"

[[package]]
name = "cc"
version = "1.2.17"
@@ -1867,6 +1962,33 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901"

[[package]]
name = "ciborium"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]

[[package]]
name = "ciborium-io"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"

[[package]]
name = "ciborium-ll"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
dependencies = [
"ciborium-io",
"half",
]

[[package]]
name = "cipher"
version = "0.4.4"
@@ -2303,6 +2425,42 @@ dependencies = [
"cfg-if 1.0.0",
]

[[package]]
name = "criterion"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb"
dependencies = [
"anes",
"atty",
"cast",
"ciborium",
"clap 3.2.25",
"criterion-plot",
"itertools 0.10.5",
"lazy_static",
"num-traits",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]

[[package]]
name = "criterion-plot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
"cast",
"itertools 0.10.5",
]

[[package]]
name = "crossbeam"
version = "0.8.4"
@@ -2809,6 +2967,12 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c"

[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"

[[package]]
name = "digest"
version = "0.10.7"
@@ -3266,6 +3430,35 @@ dependencies = [
"uuid 1.16.0",
]

[[package]]
name = "dora-openai-websocket"
version = "0.1.0"
dependencies = [
"anyhow",
"assert2",
"axum 0.8.4",
"base",
"base64 0.22.1",
"bytes",
"criterion",
"dora-cli",
"dora-node-api",
"fastwebsockets",
"futures-concurrency",
"futures-util",
"http-body-util",
"hyper 1.6.0",
"hyper-util",
"rand 0.9.2",
"rustls-pemfile 1.0.4",
"serde",
"serde_json",
"tokio",
"tokio-rustls 0.24.1",
"trybuild",
"webpki-roots 0.23.1",
]

[[package]]
name = "dora-operator-api"
version = "0.3.12"
@@ -3362,7 +3555,7 @@ dependencies = [
"ndarray 0.15.6",
"pinyin",
"pyo3",
"rand 0.9.1",
"rand 0.9.2",
"rerun",
"tokio",
]
@@ -4161,6 +4354,26 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"

[[package]]
name = "fastwebsockets"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "305d3ba574508e27190906d11707dad683e0494e6b85eae9b044cb2734a5e422"
dependencies = [
"base64 0.21.7",
"bytes",
"http-body-util",
"hyper 1.6.0",
"hyper-util",
"pin-project",
"rand 0.8.5",
"sha1",
"simdutf8",
"thiserror 1.0.69",
"tokio",
"utf-8",
]

[[package]]
name = "fdeflate"
version = "0.3.7"
@@ -4272,7 +4485,7 @@ dependencies = [
"cudarc",
"half",
"num-traits",
"rand 0.9.1",
"rand 0.9.2",
"rand_distr",
]

@@ -5052,7 +5265,7 @@ dependencies = [
"cfg-if 1.0.0",
"crunchy",
"num-traits",
"rand 0.9.1",
"rand 0.9.2",
"rand_distr",
]

@@ -5392,7 +5605,7 @@ dependencies = [
"rustls 0.23.25",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.2",
"tower-service",
"webpki-roots 0.26.8",
]
@@ -6575,6 +6788,12 @@ version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"

[[package]]
name = "matchit"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"

[[package]]
name = "matrixmultiply"
version = "0.3.9"
@@ -6858,7 +7077,7 @@ dependencies = [
"image",
"indexmap 2.8.0",
"mistralrs-core",
"rand 0.9.1",
"rand 0.9.2",
"reqwest",
"serde",
"serde_json",
@@ -6910,7 +7129,7 @@ dependencies = [
"objc",
"once_cell",
"radix_trie",
"rand 0.9.1",
"rand 0.9.2",
"rand_isaac",
"rayon",
"regex",
@@ -6931,7 +7150,7 @@ dependencies = [
"tokio",
"tokio-rayon",
"toktrie_hf_tokenizers",
"toml",
"toml 0.8.20",
"tqdm",
"tracing",
"tracing-subscriber",
@@ -7893,6 +8112,12 @@ dependencies = [
"pkg-config",
]

[[package]]
name = "oorandom"
version = "11.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"

[[package]]
name = "openssl-probe"
version = "0.1.6"
@@ -8081,7 +8306,7 @@ dependencies = [
"glob",
"opentelemetry 0.29.1",
"percent-encoding",
"rand 0.9.1",
"rand 0.9.2",
"serde_json",
"thiserror 2.0.12",
"tokio",
@@ -8531,6 +8756,34 @@ dependencies = [
"time",
]

[[package]]
name = "plotters"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]

[[package]]
name = "plotters-backend"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"

[[package]]
name = "plotters-svg"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
dependencies = [
"plotters-backend",
]

[[package]]
name = "ply-rs"
version = "0.1.3"
@@ -9161,7 +9414,7 @@ checksum = "b820744eb4dc9b57a3398183639c511b5a26d2ed702cedd3febaa1393caa22cc"
dependencies = [
"bytes",
"getrandom 0.3.2",
"rand 0.9.1",
"rand 0.9.2",
"ring 0.17.14",
"rustc-hash 2.1.1",
"rustls 0.23.25",
@@ -9261,9 +9514,9 @@ dependencies = [

[[package]]
name = "rand"
version = "0.9.1"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
dependencies = [
"rand_chacha 0.9.0",
"rand_core 0.9.3",
@@ -9314,7 +9567,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463"
dependencies = [
"num-traits",
"rand 0.9.1",
"rand 0.9.2",
]

[[package]]
@@ -9370,7 +9623,7 @@ dependencies = [
"simd_helpers",
"system-deps",
"thiserror 1.0.69",
"toml",
"toml 0.8.20",
"v_frame",
"y4m",
]
@@ -10508,7 +10761,7 @@ dependencies = [
"serde",
"syn 2.0.101",
"tempfile",
"toml",
"toml 0.8.20",
"unindent",
"xshell",
]
@@ -11260,7 +11513,7 @@ dependencies = [
"sync_wrapper",
"system-configuration",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.2",
"tokio-util",
"tower 0.5.2",
"tower-service",
@@ -11660,7 +11913,7 @@ dependencies = [
"num-derive",
"num-traits",
"paste",
"rand 0.9.1",
"rand 0.9.2",
"serde",
"serde_repr",
"socket2 0.5.8",
@@ -11731,6 +11984,18 @@ dependencies = [
"webpki",
]

[[package]]
name = "rustls"
version = "0.21.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
dependencies = [
"log",
"ring 0.17.14",
"rustls-webpki 0.101.7",
"sct",
]

[[package]]
name = "rustls"
version = "0.23.25"
@@ -11824,6 +12089,26 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f"

[[package]]
name = "rustls-webpki"
version = "0.100.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f6a5fc258f1c1276dfe3016516945546e2d5383911efc0fc4f1cdc5df3a4ae3"
dependencies = [
"ring 0.16.20",
"untrusted 0.7.1",
]

[[package]]
name = "rustls-webpki"
version = "0.101.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
dependencies = [
"ring 0.17.14",
"untrusted 0.9.0",
]

[[package]]
name = "rustls-webpki"
version = "0.102.8"
@@ -12228,9 +12513,9 @@ dependencies = [

[[package]]
name = "serde_json"
version = "1.0.140"
version = "1.0.141"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
dependencies = [
"indexmap 2.8.0",
"itoa",
@@ -12239,6 +12524,16 @@ dependencies = [
"serde",
]

[[package]]
name = "serde_path_to_error"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fab13f937fa393d08645bf3a84bdfe86e296747b506ada67bb15f10f218b2a"
dependencies = [
"itoa",
"serde",
]

[[package]]
name = "serde_plain"
version = "1.0.2"
@@ -12268,6 +12563,15 @@ dependencies = [
"serde",
]

[[package]]
name = "serde_spanned"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83"
dependencies = [
"serde",
]

[[package]]
name = "serde_urlencoded"
version = "0.7.1"
@@ -13221,7 +13525,7 @@ dependencies = [
"cfg-expr",
"heck 0.5.0",
"pkg-config",
"toml",
"toml 0.8.20",
"version-compare",
]

@@ -13257,6 +13561,12 @@ version = "0.12.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"

[[package]]
name = "target-triple"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ac9aa371f599d22256307c24a9d748c041e548cbf599f35d890f9d365361790"

[[package]]
name = "tempfile"
version = "3.19.1"
@@ -13515,6 +13825,16 @@ dependencies = [
"zerovec",
]

[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]

[[package]]
name = "tinyvec"
version = "1.9.0"
@@ -13540,7 +13860,7 @@ dependencies = [
"pin-project-lite",
"thiserror 2.0.12",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.2",
]

[[package]]
@@ -13638,6 +13958,16 @@ dependencies = [
"tokio",
]

[[package]]
name = "tokio-rustls"
version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
dependencies = [
"rustls 0.21.12",
"tokio",
]

[[package]]
name = "tokio-rustls"
version = "0.26.2"
@@ -13720,11 +14050,26 @@ checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148"
dependencies = [
"indexmap 2.8.0",
"serde",
"serde_spanned",
"toml_datetime",
"serde_spanned 0.6.8",
"toml_datetime 0.6.8",
"toml_edit",
]

[[package]]
name = "toml"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41ae868b5a0f67631c14589f7e250c1ea2c574ee5ba21c6c8dd4b1485705a5a1"
dependencies = [
"indexmap 2.8.0",
"serde",
"serde_spanned 1.0.0",
"toml_datetime 0.7.0",
"toml_parser",
"toml_writer",
"winnow",
]

[[package]]
name = "toml_datetime"
version = "0.6.8"
@@ -13734,6 +14079,15 @@ dependencies = [
"serde",
]

[[package]]
name = "toml_datetime"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3"
dependencies = [
"serde",
]

[[package]]
name = "toml_edit"
version = "0.22.24"
@@ -13742,11 +14096,26 @@ checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474"
dependencies = [
"indexmap 2.8.0",
"serde",
"serde_spanned",
"toml_datetime",
"serde_spanned 0.6.8",
"toml_datetime 0.6.8",
"winnow",
]

[[package]]
name = "toml_parser"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97200572db069e74c512a14117b296ba0a80a30123fbbb5aa1f4a348f639ca30"
dependencies = [
"winnow",
]

[[package]]
name = "toml_writer"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcc842091f2def52017664b53082ecbbeb5c7731092bad69d2c63050401dfd64"

[[package]]
name = "tonic"
version = "0.12.3"
@@ -13755,7 +14124,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
dependencies = [
"async-stream",
"async-trait",
"axum",
"axum 0.7.9",
"base64 0.22.1",
"bytes",
"h2 0.4.8",
@@ -13772,7 +14141,7 @@ dependencies = [
"rustls-pemfile 2.2.0",
"socket2 0.5.8",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.2",
"tokio-stream",
"tower 0.4.13",
"tower-layer",
@@ -13858,6 +14227,7 @@ dependencies = [
"tokio",
"tower-layer",
"tower-service",
"tracing",
]

[[package]]
@@ -14005,6 +14375,21 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"

[[package]]
name = "trybuild"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65af40ad689f2527aebbd37a0a816aea88ff5f774ceabe99de5be02f2f91dae2"
dependencies = [
"glob",
"serde",
"serde_derive",
"serde_json",
"target-triple",
"termcolor",
"toml 0.9.4",
]

[[package]]
name = "ttf-parser"
version = "0.25.1"
@@ -14352,7 +14737,7 @@ checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9"
dependencies = [
"getrandom 0.3.2",
"js-sys",
"rand 0.9.1",
"rand 0.9.2",
"serde",
"uuid-macro-internal",
"wasm-bindgen",
@@ -14810,6 +15195,15 @@ dependencies = [
"webpki",
]

[[package]]
name = "webpki-roots"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b03058f88386e5ff5310d9111d53f48b17d732b401aeb83a8d5190f2ac459338"
dependencies = [
"rustls-webpki 0.100.3",
]

[[package]]
name = "webpki-roots"
version = "0.26.8"
@@ -15676,9 +16070,9 @@ dependencies = [

[[package]]
name = "winnow"
version = "0.7.4"
version = "0.7.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e97b544156e9bebe1a0ffbc03484fc1ffe3100cbce3ffb17eac35f7cdd7ab36"
checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95"
dependencies = [
"memchr",
]
@@ -15888,6 +16282,12 @@ dependencies = [
"linked-hash-map",
]

[[package]]
name = "yansi"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"

[[package]]
name = "yoke"
version = "0.7.5"
@@ -16549,7 +16949,7 @@ dependencies = [
"time",
"tls-listener",
"tokio",
"tokio-rustls",
"tokio-rustls 0.26.2",
"tokio-util",
"tracing",
"webpki-roots 0.26.8",


+ 1
- 0
Cargo.toml View File

@@ -34,6 +34,7 @@ members = [
"node-hub/dora-rerun",
"node-hub/terminal-print",
"node-hub/openai-proxy-server",
"node-hub/dora-openai-websocket",
"node-hub/dora-kit-car",
"node-hub/dora-object-to-pose",
"node-hub/dora-mistral-rs",


+ 1
- 0
binaries/cli/src/command/coordinator.rs View File

@@ -9,6 +9,7 @@ use dora_tracing::TracingBuilder;
use eyre::Context;
use std::net::{IpAddr, SocketAddr};
use tokio::runtime::Builder;
#[cfg(feature = "tracing")]
use tracing::level_filters::LevelFilter;

#[derive(Debug, clap::Args)]


+ 1
- 0
binaries/cli/src/command/daemon.rs View File

@@ -14,6 +14,7 @@ use std::{
path::PathBuf,
};
use tokio::runtime::Builder;
#[cfg(feature = "tracing")]
use tracing::level_filters::LevelFilter;

#[derive(Debug, clap::Args)]


+ 16
- 16
binaries/cli/src/command/mod.rs View File

@@ -16,22 +16,22 @@ mod up;

pub use run::run_func;

use build::Build;
use check::Check;
use coordinator::Coordinator;
use daemon::Daemon;
use destroy::Destroy;
use eyre::Context;
use graph::Graph;
use list::ListArgs;
use logs::LogsArgs;
use new::NewArgs;
use run::Run;
use runtime::Runtime;
use self_::SelfSubCommand;
use start::Start;
use stop::Stop;
use up::Up;
pub use build::Build;
pub use check::Check;
pub use coordinator::Coordinator;
pub use daemon::Daemon;
pub use destroy::Destroy;
pub use eyre::Context;
pub use graph::Graph;
pub use list::ListArgs;
pub use logs::LogsArgs;
pub use new::NewArgs;
pub use run::Run;
pub use runtime::Runtime;
pub use self_::SelfSubCommand;
pub use start::Start;
pub use stop::Stop;
pub use up::Up;

/// dora-rs cli client
#[derive(Debug, clap::Subcommand)]


+ 1
- 0
binaries/cli/src/command/run.rs View File

@@ -12,6 +12,7 @@ use crate::{
session::DataflowSession,
};
use dora_daemon::{flume, Daemon, LogDestination};
#[cfg(feature = "tracing")]
use dora_tracing::TracingBuilder;
use eyre::Context;
use tokio::runtime::Builder;


+ 8
- 8
binaries/cli/src/command/start/mod.rs View File

@@ -31,28 +31,28 @@ mod attach;
pub struct Start {
/// Path to the dataflow descriptor file
#[clap(value_name = "PATH")]
dataflow: String,
pub dataflow: String,
/// Assign a name to the dataflow
#[clap(long)]
name: Option<String>,
pub name: Option<String>,
/// Address of the dora coordinator
#[clap(long, value_name = "IP", default_value_t = LOCALHOST)]
coordinator_addr: IpAddr,
pub coordinator_addr: IpAddr,
/// Port number of the coordinator control server
#[clap(long, value_name = "PORT", default_value_t = DORA_COORDINATOR_PORT_CONTROL_DEFAULT)]
coordinator_port: u16,
pub coordinator_port: u16,
/// Attach to the dataflow and wait for its completion
#[clap(long, action)]
attach: bool,
pub attach: bool,
/// Run the dataflow in background
#[clap(long, action)]
detach: bool,
pub detach: bool,
/// Enable hot reloading (Python only)
#[clap(long, action)]
hot_reload: bool,
pub hot_reload: bool,
// Use UV to run nodes.
#[clap(long, action)]
uv: bool,
pub uv: bool,
}

impl Executable for Start {


+ 1
- 1
binaries/cli/src/lib.rs View File

@@ -5,7 +5,7 @@ use std::{
path::PathBuf,
};

mod command;
pub mod command;
mod common;
mod formatting;
pub mod output;


+ 78
- 0
examples/openai-realtime/README.md View File

@@ -0,0 +1,78 @@
# Dora-OpenAI-Realtime (ROOT Repo)

## Front End

### Build Client

```bash
git clone git@github.com:haixuanTao/moly.git --branch dora-backend-support
cd moly
cargo build --release
```

### Run Client

```bash
cd moly
cargo run -r
```

## Server

### Build server

```bash
uv venv --seed -p 3.11
source .venv/bin/activate
uv pip install dora-rs-cli dora-rs
dora build whisper-template-metal.yml --uv ## very long process
```

### Run server

```bash
source .venv/bin/activate
dora up
cargo run --release -p dora-openai-websocket
```

## On finish

```bash
dora destroy
```

## GUI

- Go to MolyServer Tab
- Add a custom Provider
- In API Host, use:

- Name: dora-websocket
- API Host: ws://127.0.0.1:8123
- Type: OpenAI Realtime

- Then go to Chat Tab
- New Chat
- ( Make sure the servver is running with: `cargo run --release -p dora-openai-websocket`)
- On bottom right, click on 🎧 icon.
> If nothing happen is that the server is not found.
- Click on start
- Wait for the first AI greeting
- Start speaking!
- You should get AI response!

### WIP: Moyoyo

## {Recommended} Install git-lfs

```bash
brew install git-lfs # MacOS
```

## Clone Moxin Voice Chat

```bash
git lfs install
git clone https://github.com/moxin-org/moxin-voice-chat.git
```

+ 54
- 0
examples/openai-realtime/whisper-template-metal.yml View File

@@ -0,0 +1,54 @@
nodes:
- id: NODE_ID
path: dynamic
inputs:
audio: tts/audio
text: stt/text
speech_started: stt/speech_started
outputs:
- audio
- text

- id: dora-vad
build: pip install -e ../../node-hub/dora-vad
path: dora-vad
inputs:
audio:
source: NODE_ID/audio
queue_size: 1000000
outputs:
- audio
env:
MIN_SPEECH_DURATION_MS: 1000
MIN_SILENCE_DURATION_MS: 1000
THRESHOLD: 0.5

- id: stt
build: pip install -e ../../node-hub/dora-distil-whisper
path: dora-distil-whisper
inputs:
audio: dora-vad/audio
outputs:
- text
- word
- speech_started

- id: llm
build: pip install -e ../../node-hub/dora-qwen
path: dora-qwen
inputs:
text: stt/text
text_to_audio: NODE_ID/text
outputs:
- text
env:
MODEL_NAME_OR_PATH: LLM_ID
MODEL_FILE_PATTERN: "*[qQ]6_[kK].[gG][gG][uU][fF]"

- id: tts
build: pip install -e ../../node-hub/dora-kokoro-tts
path: dora-kokoro-tts
inputs:
text: llm/text
outputs:
- audio

+ 77
- 35
node-hub/dora-distil-whisper/dora_distil_whisper/main.py View File

@@ -6,6 +6,7 @@ import sys
import time
from pathlib import Path

import numpy as np
import pyarrow as pa
import torch
from dora import Node
@@ -47,35 +48,6 @@ def remove_text_noise(text: str, text_noise="") -> str:
text_words = normalized_text.split()
noise_words = normalized_noise.split()

# Function to find and remove noise sequence flexibly
def remove_flexible(text_list, noise_list):
i = 0
while i <= len(text_list) - len(noise_list):
match = True
extra_words = 0
for j, noise_word in enumerate(noise_list):
if i + j + extra_words >= len(text_list):
match = False
break
# Allow skipping extra words in text_list
while (
i + j + extra_words < len(text_list)
and text_list[i + j + extra_words] != noise_word
):
extra_words += 1
if i + j + extra_words >= len(text_list):
match = False
break
if not match:
break
if match:
# Remove matched part
del text_list[i : i + len(noise_list) + extra_words]
i = max(0, i - len(noise_list)) # Adjust index after removal
else:
i += 1
return text_list

# Only remove parts of text_noise that are found in text
cleaned_words = text_words[:]
for noise_word in noise_words:
@@ -124,7 +96,30 @@ def load_model():
BAD_SENTENCES = [
"",
" so",
" So.",
" So, let's go.",
" so so",
" What?",
" We'll see you next time.",
" I'll see you next time.",
" We're going to come back.",
" let's move on.",
" Here we go.",
" my",
" All right. Thank you.",
" That's what we're doing.",
" That's what I wanted to do.",
" I'll be back.",
" Hold this. Hold this.",
" Hold this one. Hold this one.",
" And we'll see you next time.",
" strength.",
" Length.",
" Let's go.",
" Let's do it.",
"You",
"You ",
" You",
"字幕",
"字幕志愿",
"中文字幕",
@@ -181,13 +176,29 @@ def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):

def main():
"""TODO: Add docstring."""
node = Node()
text_noise = ""
noise_timestamp = time.time()
# For macos use mlx:
if sys.platform != "darwin":
pipe = load_model()
else:
import mlx_whisper

result = mlx_whisper.transcribe(
np.array([]),
path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
append_punctuations=".",
language=TARGET_LANGUAGE,
)
result = mlx_whisper.transcribe(
np.array([]),
path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
append_punctuations=".",
language=TARGET_LANGUAGE,
)

node = Node()
noise_timestamp = time.time()
cache_audio = None
for event in node:
if event["type"] == "INPUT":
if "text_noise" in event["id"]:
@@ -200,7 +211,12 @@ def main():
)
noise_timestamp = time.time()
else:
audio = event["value"].to_numpy()
audio_input = event["value"].to_numpy()
if cache_audio is not None:
audio = np.concatenate([cache_audio, audio_input])
else:
audio = audio_input

confg = (
{"language": TARGET_LANGUAGE, "task": "translate"}
if TRANSLATE
@@ -215,6 +231,7 @@ def main():
audio,
path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
append_punctuations=".",
language=TARGET_LANGUAGE,
)

else:
@@ -223,6 +240,8 @@ def main():
generate_kwargs=confg,
)
if result["text"] in BAD_SENTENCES:
print("Discarded text: ", result["text"])
# cache_audio = None
continue
text = cut_repetition(result["text"])

@@ -235,6 +254,29 @@ def main():

if text.strip() == "" or text.strip() == ".":
continue
node.send_output(
"text", pa.array([text]), {"language": TARGET_LANGUAGE},
)

if (
text.endswith(".")
or text.endswith("!")
or text.endswith("?")
or text.endswith('."')
or text.endswith('!"')
or text.endswith('?"')
) and not text.endswith("..."):
node.send_output(
"text",
pa.array([text]),
)
node.send_output(
"speech_started",
pa.array([text]),
)
cache_audio = None
audio = None
print("Text:", text)
elif text.endswith("..."):
print(
"Keeping audio in cache for next text output with punctuation"
)
print("Discarded text", text)
cache_audio = audio

+ 20
- 17
node-hub/dora-kokoro-tts/dora_kokoro_tts/main.py View File

@@ -1,4 +1,5 @@
"""TODO: Add docstring."""

import os
import re

@@ -8,11 +9,12 @@ from kokoro import KPipeline

LANGUAGE = os.getenv("LANGUAGE", "en")


def main():
"""TODO: Add docstring."""
if LANGUAGE in ["en", "english"]:
pipeline = KPipeline(lang_code="a")
elif LANGUAGE in ["zh","ch","chinese"]:
elif LANGUAGE in ["zh", "ch", "chinese"]:
pipeline = KPipeline(lang_code="z")
else:
print("warning: Defaulting to english speaker as language not found")
@@ -22,22 +24,23 @@ def main():

for event in node:
if event["type"] == "INPUT":
if event["id"] == "text":
text = event["value"][0].as_py()
if re.findall(r'[\u4e00-\u9fff]+', text):
pipeline = KPipeline(lang_code="z")
elif pipeline.lang_code != "a":
pipeline = KPipeline(lang_code="a") # <= make sure lang_code matches voice

generator = pipeline(
text,
voice="af_heart", # <= change voice here
speed=1.2,
split_pattern=r"\n+",
)
for _, (_, _, audio) in enumerate(generator):
audio = audio.numpy()
node.send_output("audio", pa.array(audio), {"sample_rate": 24000})
text = event["value"][0].as_py()
if re.findall(r"[\u4e00-\u9fff]+", text):
pipeline = KPipeline(lang_code="z")
elif pipeline.lang_code != "a":
pipeline = KPipeline(
lang_code="a"
) # <= make sure lang_code matches voice

generator = pipeline(
text,
voice="af_heart", # <= change voice here
speed=1.2,
split_pattern=r"\n+",
)
for _, (_, _, audio) in enumerate(generator):
audio = audio.numpy()
node.send_output("audio", pa.array(audio), {"sample_rate": 24000})


if __name__ == "__main__":


+ 5
- 1
node-hub/dora-kokoro-tts/pyproject.toml View File

@@ -5,13 +5,14 @@ authors = [{ name = "Your Name", email = "email@email.com" }]
description = "dora-kokoro-tts"
license = { text = "MIT" }
readme = "README.md"
requires-python = ">=3.8"
requires-python = ">=3.10"

dependencies = [
"dora-rs >= 0.3.9",
"kokoro>=0.2.2",
"soundfile>=0.13.1",
"misaki[zh]",
"en-core-web-sm",
]

[dependency-groups]
@@ -31,3 +32,6 @@ extend-select = [
"N", # Ruff's N rule
"I", # Ruff's I rule
]

[tool.uv.sources]
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }

+ 31
- 0
node-hub/dora-openai-websocket/Cargo.toml View File

@@ -0,0 +1,31 @@
[package]
name = "dora-openai-websocket"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
dora-node-api = { workspace = true }
dora-cli = { path = "../../binaries/cli", default-features = false }
tokio = { version = "1.25.0", features = ["full", "macros"] }
tokio-rustls = "0.24.0"
rustls-pemfile = "1.0"
hyper-util = { version = "0.1.0", features = ["tokio"] }
http-body-util = { version = "0.1.0" }
hyper = { version = "1", features = ["http1", "server", "client"] }
assert2 = "0.3.4"
trybuild = "1.0.106"
criterion = "0.4.0"
anyhow = "1.0.71"
webpki-roots = "0.23.0"
bytes = "1.4.0"
axum = "0.8.1"
fastwebsockets = { version = "0.10.0", features = ["upgrade"] }
serde_json = "1.0.141"
serde = "1.0.219"
base = "0.1.0"
base64 = "0.22.1"
rand = "0.9.2"
futures-util = "0.3.31"
futures-concurrency = "7.6.3"

+ 513
- 0
node-hub/dora-openai-websocket/src/main.rs View File

@@ -0,0 +1,513 @@
// Copyright 2023 Divy Srivastava <dj.srivastava23@gmail.com>
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use base64::engine::general_purpose;
use base64::Engine;
use dora_cli::command::Executable;
use dora_cli::command::Start;
use dora_node_api::arrow::array::AsArray;
use dora_node_api::arrow::datatypes::DataType;
use dora_node_api::dora_core::config::DataId;
use dora_node_api::dora_core::config::NodeId;
use dora_node_api::dora_core::topics::DORA_COORDINATOR_PORT_CONTROL_DEFAULT;
use dora_node_api::into_vec;
use dora_node_api::DoraNode;
use dora_node_api::IntoArrow;
use dora_node_api::MetadataParameters;
use fastwebsockets::upgrade;
use fastwebsockets::Frame;
use fastwebsockets::OpCode;
use fastwebsockets::Payload;
use fastwebsockets::WebSocketError;
use futures_concurrency::future::Race;
use futures_util::future;
use futures_util::future::Either;
use futures_util::FutureExt;
use http_body_util::Empty;
use hyper::body::Bytes;
use hyper::body::Incoming;
use hyper::server::conn::http1;
use hyper::service::service_fn;
use hyper::Request;
use hyper::Response;
use rand::random;
use serde;
use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::fs;
use std::io::{self, Write};
use std::net::IpAddr;
use std::net::Ipv4Addr;
use tokio::net::TcpListener;
#[derive(Serialize, Deserialize, Debug)]
pub struct ErrorDetails {
pub code: Option<String>,
pub message: String,
pub param: Option<String>,
#[serde(rename = "type")]
pub error_type: Option<String>,
}

#[derive(Serialize, Deserialize, Debug)]
#[serde(tag = "type")]
pub enum OpenAIRealtimeMessage {
#[serde(rename = "session.update")]
SessionUpdate { session: SessionConfig },
#[serde(rename = "input_audio_buffer.append")]
InputAudioBufferAppend {
audio: String, // base64 encoded audio
},
#[serde(rename = "input_audio_buffer.commit")]
InputAudioBufferCommit,
#[serde(rename = "response.create")]
ResponseCreate { response: ResponseConfig },
#[serde(rename = "conversation.item.create")]
ConversationItemCreate { item: ConversationItem },
#[serde(rename = "conversation.item.truncate")]
ConversationItemTruncate {
item_id: String,
content_index: u32,
audio_end_ms: u32,
#[serde(skip_serializing_if = "Option::is_none")]
event_id: Option<String>,
},
}

#[derive(Serialize, Deserialize, Debug)]
pub struct SessionConfig {
pub modalities: Vec<String>,
pub instructions: String,
pub voice: String,
pub model: String,
pub input_audio_format: String,
pub output_audio_format: String,
pub input_audio_transcription: Option<TranscriptionConfig>,
pub turn_detection: Option<TurnDetectionConfig>,
pub tools: Vec<serde_json::Value>,
pub tool_choice: String,
pub temperature: f32,
pub max_response_output_tokens: Option<u32>,
}

#[derive(Serialize, Deserialize, Debug)]
pub struct TranscriptionConfig {
pub model: String,
}

#[derive(Serialize, Deserialize, Debug)]
pub struct TurnDetectionConfig {
#[serde(rename = "type")]
pub detection_type: String,
pub threshold: f32,
pub prefix_padding_ms: u32,
pub silence_duration_ms: u32,
pub interrupt_response: bool,
pub create_response: bool,
}

#[derive(Serialize, Deserialize, Debug)]
pub struct ResponseConfig {
pub modalities: Vec<String>,
pub instructions: Option<String>,
pub voice: Option<String>,
pub output_audio_format: Option<String>,
pub tools: Option<Vec<serde_json::Value>>,
pub tool_choice: Option<String>,
pub temperature: Option<f32>,
pub max_output_tokens: Option<u32>,
}

#[derive(Serialize, Deserialize, Debug)]
pub struct ConversationItem {
pub id: Option<String>,
#[serde(rename = "type")]
pub item_type: String,
pub status: Option<String>,
pub role: String,
pub content: Vec<ContentPart>,
}

#[derive(Serialize, Deserialize, Debug)]
#[serde(tag = "type")]
pub enum ContentPart {
#[serde(rename = "input_text")]
InputText { text: String },
#[serde(rename = "input_audio")]
InputAudio {
audio: String,
transcript: Option<String>,
},
#[serde(rename = "text")]
Text { text: String },
#[serde(rename = "audio")]
Audio {
audio: String,
transcript: Option<String>,
},
}

// Incoming message types from OpenAI
#[derive(Serialize, Deserialize, Debug)]
#[serde(tag = "type")]
pub enum OpenAIRealtimeResponse {
#[serde(rename = "error")]
Error { error: ErrorDetails },
#[serde(rename = "session.created")]
SessionCreated { session: serde_json::Value },
#[serde(rename = "session.updated")]
SessionUpdated { session: serde_json::Value },
#[serde(rename = "conversation.item.created")]
ConversationItemCreated { item: serde_json::Value },
#[serde(rename = "conversation.item.truncated")]
ConversationItemTruncated { item: serde_json::Value },
#[serde(rename = "response.audio.delta")]
ResponseAudioDelta {
response_id: String,
item_id: String,
output_index: u32,
content_index: u32,
delta: String, // base64 encoded audio
},
#[serde(rename = "response.audio.done")]
ResponseAudioDone {
response_id: String,
item_id: String,
output_index: u32,
content_index: u32,
},
#[serde(rename = "response.text.delta")]
ResponseTextDelta {
response_id: String,
item_id: String,
output_index: u32,
content_index: u32,
delta: String,
},
#[serde(rename = "response.audio_transcript.delta")]
ResponseAudioTranscriptDelta {
response_id: String,
item_id: String,
output_index: u32,
content_index: u32,
delta: String,
},
#[serde(rename = "response.done")]
ResponseDone { response: serde_json::Value },
#[serde(rename = "input_audio_buffer.speech_started")]
InputAudioBufferSpeechStarted {
audio_start_ms: u32,
item_id: String,
},
#[serde(rename = "input_audio_buffer.speech_stopped")]
InputAudioBufferSpeechStopped { audio_end_ms: u32, item_id: String },
#[serde(other)]
Other,
}

fn convert_pcm16_to_f32(bytes: &[u8]) -> Vec<f32> {
let mut samples = Vec::with_capacity(bytes.len() / 2);

for chunk in bytes.chunks_exact(2) {
let pcm16_sample = i16::from_le_bytes([chunk[0], chunk[1]]);
let f32_sample = pcm16_sample as f32 / 32767.0;
samples.push(f32_sample);
}

samples
}

fn convert_f32_to_pcm16(samples: &[f32]) -> Vec<u8> {
let mut pcm16_bytes = Vec::with_capacity(samples.len() * 2);

for &sample in samples {
// Clamp to [-1.0, 1.0] and convert to i16
let clamped = sample.max(-1.0).min(1.0);
let pcm16_sample = (clamped * 32767.0) as i16;
pcm16_bytes.extend_from_slice(&pcm16_sample.to_le_bytes());
}

pcm16_bytes
}

/// Replaces a placeholder in a file and writes the result to an output file.
///
/// # Arguments
///
/// * `input_path` - Path to the input file with placeholder text.
/// * `placeholder` - The placeholder text to search for (e.g., "{{PLACEHOLDER}}").
/// * `replacement` - The text to replace the placeholder with.
/// * `output_path` - Path to write the modified content.
fn replace_placeholder_in_file(
input_path: &str,
replacement: &HashMap<String, String>,
output_path: &str,
) -> io::Result<()> {
// Read the file content into a string
let mut content = fs::read_to_string(input_path)?;

// Replace the placeholder
for (placeholder, replacement) in replacement {
// Ensure the placeholder is wrapped in curly braces
// Replace the placeholder with the replacement text
content = content.replace(placeholder, replacement);
}

// Write the modified content to the output file
let mut file = fs::File::create(output_path)?;
file.write_all(content.as_bytes())?;

Ok(())
}

async fn handle_client(fut: upgrade::UpgradeFut) -> Result<(), WebSocketError> {
let mut ws = fastwebsockets::FragmentCollector::new(fut.await?);

let frame = ws.read_frame().await?;
if frame.opcode != OpCode::Text {
return Err(WebSocketError::InvalidConnectionHeader);
}
let data: OpenAIRealtimeMessage = serde_json::from_slice(&frame.payload).unwrap();
let OpenAIRealtimeMessage::SessionUpdate { session } = data else {
return Err(WebSocketError::InvalidConnectionHeader);
};

let input_audio_transcription = session
.input_audio_transcription
.map_or("moyoyo-whisper".to_string(), |t| t.model);
let llm = session.model.clone();
let id = random::<u16>();
let node_id = format!("server-{id}");
let dataflow = format!("{input_audio_transcription}-{}.yml", id);
let template = format!("{input_audio_transcription}-template-metal.yml");
let mut replacements = HashMap::new();
replacements.insert("NODE_ID".to_string(), node_id.clone());
replacements.insert("LLM_ID".to_string(), llm);
println!("Filling template: {}", template);
replace_placeholder_in_file(&template, &replacements, &dataflow).unwrap();
// Copy configuration file but replace the node ID with "server-id"
// Read the configuration file and replace the node ID with "server-id"
dora_cli::command::Command::Start(Start {
dataflow,
name: Some(node_id.to_string()),
coordinator_addr: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
coordinator_port: DORA_COORDINATOR_PORT_CONTROL_DEFAULT,
attach: false,
detach: true,
hot_reload: false,
uv: true,
})
.execute()
.unwrap();
let (mut node, mut events) =
DoraNode::init_from_node_id(NodeId::from(node_id.clone())).unwrap();
let serialized_data = OpenAIRealtimeResponse::SessionCreated {
session: serde_json::Value::Null,
};

let payload =
Payload::Bytes(Bytes::from(serde_json::to_string(&serialized_data).unwrap()).into());
let frame = Frame::text(payload);
ws.write_frame(frame).await?;
loop {
let event_fut = events.recv_async().map(Either::Left);
let frame_fut = ws.read_frame().map(Either::Right);
let event_stream = (event_fut, frame_fut).race();
let mut finished = false;
let frame = match event_stream.await {
future::Either::Left(Some(ev)) => {
let frame = match ev {
dora_node_api::Event::Input {
id,
metadata: _,
data,
} => {
if data.data_type() == &DataType::Utf8 {
let data = data.as_string::<i32>();
let str = data.value(0);
let serialized_data =
OpenAIRealtimeResponse::ResponseAudioTranscriptDelta {
response_id: "123".to_string(),
item_id: "123".to_string(),
output_index: 123,
content_index: 123,
delta: str.to_string(),
};

let frame = Frame::text(Payload::Bytes(
Bytes::from(serde_json::to_string(&serialized_data).unwrap())
.into(),
));
frame
} else if id.contains("audio") {
let data: Vec<f32> = into_vec(&data).unwrap();
let data = convert_f32_to_pcm16(&data);
let serialized_data = OpenAIRealtimeResponse::ResponseAudioDelta {
response_id: "123".to_string(),
item_id: "123".to_string(),
output_index: 123,
content_index: 123,
delta: general_purpose::STANDARD.encode(data),
};
finished = true;

let frame = Frame::text(Payload::Bytes(
Bytes::from(serde_json::to_string(&serialized_data).unwrap())
.into(),
));
frame
} else if id.contains("") {
let serialized_data =
OpenAIRealtimeResponse::InputAudioBufferSpeechStarted {
audio_start_ms: 123,
item_id: "123".to_string(),
};
finished = true;

let frame = Frame::text(Payload::Bytes(
Bytes::from(serde_json::to_string(&serialized_data).unwrap())
.into(),
));
frame
} else {
unimplemented!()
}
}
dora_node_api::Event::Error(_) => {
// println!("Error in input: {}", s);
continue;
}
_ => break,
};
Some(frame)
}
future::Either::Left(None) => break,
future::Either::Right(Ok(frame)) => {
match frame.opcode {
OpCode::Close => break,
OpCode::Text | OpCode::Binary => {
let data: OpenAIRealtimeMessage =
serde_json::from_slice(&frame.payload).unwrap();

match data {
OpenAIRealtimeMessage::InputAudioBufferAppend { audio } => {
// println!("Received audio data: {}", audio);
let f32_data = audio;
// Decode base64 encoded audio data
let f32_data = f32_data.trim();
if f32_data.is_empty() {
continue;
}

if let Ok(f32_data) = general_purpose::STANDARD.decode(f32_data) {
let f32_data = convert_pcm16_to_f32(&f32_data);
// Downsample to 16 kHz from 24 kHz
let f32_data = f32_data
.into_iter()
.enumerate()
.filter(|(i, _)| i % 3 != 0)
.map(|(_, v)| v)
.collect::<Vec<f32>>();

let mut parameter = MetadataParameters::default();
parameter.insert(
"sample_rate".to_string(),
dora_node_api::Parameter::Integer(16000),
);
node.send_output(
DataId::from("audio".to_string()),
parameter,
f32_data.into_arrow(),
)
.unwrap();
}
}
OpenAIRealtimeMessage::InputAudioBufferCommit => break,
OpenAIRealtimeMessage::ResponseCreate { response } => {
if let Some(text) = response.instructions {
node.send_output(
DataId::from("text".to_string()),
Default::default(),
text.into_arrow(),
)
.unwrap();
}
}
_ => {}
}
}
_ => break,
}
None
}
future::Either::Right(Err(_)) => break,
};
if let Some(frame) = frame {
ws.write_frame(frame).await?;
}
if finished {
let serialized_data = OpenAIRealtimeResponse::ResponseDone {
response: serde_json::Value::Null,
};

let payload = Payload::Bytes(
Bytes::from(serde_json::to_string(&serialized_data).unwrap()).into(),
);
println!("Sending response done: {:?}", serialized_data);
let frame = Frame::text(payload);
ws.write_frame(frame).await?;
};
}

Ok(())
}
async fn server_upgrade(
mut req: Request<Incoming>,
) -> Result<Response<Empty<Bytes>>, WebSocketError> {
let (response, fut) = upgrade::upgrade(&mut req)?;

tokio::task::spawn(async move {
if let Err(e) = tokio::task::unconstrained(handle_client(fut)).await {
eprintln!("Error in websocket connection: {}", e);
}
});

Ok(response)
}

fn main() -> Result<(), WebSocketError> {
let rt = tokio::runtime::Builder::new_multi_thread()
.enable_io()
.enable_time()
.build()
.unwrap();

rt.block_on(async move {
let listener = TcpListener::bind("127.0.0.1:8123").await?;
println!("Server started, listening on {}", "127.0.0.1:8123");
loop {
let (stream, _) = listener.accept().await?;
println!("Client connected");
tokio::spawn(async move {
let io = hyper_util::rt::TokioIo::new(stream);
let conn_fut = http1::Builder::new()
.serve_connection(io, service_fn(server_upgrade))
.with_upgrades();
if let Err(e) = conn_fut.await {
println!("An error occurred: {:?}", e);
}
});
}
})
}

+ 21
- 27
node-hub/dora-qwen/dora_qwen/main.py View File

@@ -1,7 +1,6 @@
"""TODO: Add docstring."""

import os
import sys

import pyarrow as pa
from dora import Node
@@ -12,14 +11,24 @@ SYSTEM_PROMPT = os.getenv(
"You're a very succinct AI assistant with short answers.",
)

MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", "Qwen/Qwen2.5-0.5B-Instruct-GGUF")
MODEL_FILE_PATTERN = os.getenv("MODEL_FILE_PATTERN", "*fp16.gguf")
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "512"))
N_GPU_LAYERS = int(os.getenv("N_GPU_LAYERS", "0"))
N_THREADS = int(os.getenv("N_THREADS", "4"))
CONTEXT_SIZE = int(os.getenv("CONTEXT_SIZE", "4096"))


def get_model_gguf():
"""TODO: Add docstring."""
from llama_cpp import Llama

return Llama.from_pretrained(
repo_id="Qwen/Qwen2.5-0.5B-Instruct-GGUF",
filename="*fp16.gguf",
repo_id=MODEL_NAME_OR_PATH,
filename=MODEL_FILE_PATTERN,
n_gpu_layers=N_GPU_LAYERS,
n_ctx=CONTEXT_SIZE,
n_threads=N_THREADS,
verbose=False,
)

@@ -71,12 +80,7 @@ def main():
"""TODO: Add docstring."""
history = []
# If OS is not Darwin, use Huggingface model
if sys.platform == "darwin":
model = get_model_gguf()
elif sys.platform == "linux":
model, tokenizer = get_model_huggingface()
else:
model, tokenizer = get_model_darwin()
model = get_model_gguf()

node = Node()

@@ -89,24 +93,14 @@ def main():
if len(ACTIVATION_WORDS) == 0 or any(
word in ACTIVATION_WORDS for word in words
):
# On linux, Windows
if sys.platform == "darwin":
response = model.create_chat_completion(
messages=[{"role": "user", "content": text}], # Prompt
max_tokens=24,
)["choices"][0]["message"]["content"]
elif sys.platform == "linux":
response, history = generate_hf(model, tokenizer, text, history)
else:
from mlx_lm import generate

response = generate(
model,
tokenizer,
prompt=text,
verbose=False,
max_tokens=50,
)
history += [{"role": "user", "content": text}]

response = model.create_chat_completion(
messages=history, # Prompt
max_tokens=24,
)["choices"][0]["message"]["content"]

history += [{"role": "assistant", "content": response}]

node.send_output(
output_id="text",


+ 15
- 4
node-hub/dora-vad/dora_vad/main.py View File

@@ -36,21 +36,32 @@ def main():
threshold=THRESHOLD,
min_speech_duration_ms=MIN_SPEECH_DURATION_MS,
min_silence_duration_ms=MIN_SILENCE_DURATION_MS,
sampling_rate=sr,
)

if len(speech_timestamps) == 0:
# If there is no speech, return the audio
continue
arg_max = np.argmax([ts["end"] - ts["start"] for ts in speech_timestamps])
# Check ig there is timestamp
if (
len(speech_timestamps) > 0
and len(audio) > MIN_AUDIO_SAMPLING_DURATION_MS * sr / 1000
and len(
audio[speech_timestamps[0]["start"] : speech_timestamps[-1]["end"]]
)
> MIN_AUDIO_SAMPLING_DURATION_MS * sr / 1000
and (
(len(audio) - speech_timestamps[arg_max]["end"])
> MIN_SILENCE_DURATION_MS / 1000 * sr * 5
)
):
# Check if the audio is not cut at the end. And only return if there is a long time spent
if speech_timestamps[-1]["end"] == len(audio):
node.send_output(
"timestamp_start",
pa.array([speech_timestamps[-1]["start"]]),
metadata={"sample_rate": sr},
)
continue
audio = audio[0 : speech_timestamps[-1]["end"]]
audio = audio[: speech_timestamps[-1]["end"]]
node.send_output("audio", pa.array(audio), metadata={"sample_rate": sr})
last_audios = [audio[speech_timestamps[-1]["end"] :]]



Loading…
Cancel
Save