From 4c59df0342ef394407c22c2843b93dfd0161079c Mon Sep 17 00:00:00 2001
From: haixuantao <tao.xavier@outlook.com>
Date: Mon, 13 Jan 2025 23:17:01 +0100
Subject: [PATCH] Adding outtetts as TTS

---
 node-hub/dora-outtetts/README.md              |  37 ++++++
 .../dora-outtetts/dora_outtetts/__init__.py   |  11 ++
 .../dora-outtetts/dora_outtetts/__main__.py   |   5 +
 node-hub/dora-outtetts/dora_outtetts/main.py  | 108 ++++++++++++++++++
 .../dora_outtetts/tests/test_main.py          |  14 +++
 node-hub/dora-outtetts/pyproject.toml         |  32 ++++++
 6 files changed, 207 insertions(+)
 create mode 100644 node-hub/dora-outtetts/README.md
 create mode 100644 node-hub/dora-outtetts/dora_outtetts/__init__.py
 create mode 100644 node-hub/dora-outtetts/dora_outtetts/__main__.py
 create mode 100644 node-hub/dora-outtetts/dora_outtetts/main.py
 create mode 100644 node-hub/dora-outtetts/dora_outtetts/tests/test_main.py
 create mode 100644 node-hub/dora-outtetts/pyproject.toml

diff --git a/node-hub/dora-outtetts/README.md b/node-hub/dora-outtetts/README.md
new file mode 100644
index 00000000..72d8b916
--- /dev/null
+++ b/node-hub/dora-outtetts/README.md
@@ -0,0 +1,37 @@
+# dora-outtetts
+
+## Getting started
+
+- Install it with pip:
+
+```bash
+pip install -e .
+```
+
+## Contribution Guide
+
+- Format with [black](https://github.com/psf/black):
+
+```bash
+black . # Format
+```
+
+- Lint with [pylint](https://github.com/pylint-dev/pylint):
+
+```bash
+pylint --disable=C,R --ignored-modules=cv2 . # Lint
+```
+
+- Test with [pytest](https://github.com/pytest-dev/pytest)
+
+```bash
+pytest . # Test
+```
+
+## YAML Specification
+
+## Examples
+
+## License
+
+dora-outtetts's code are released under the MIT License
diff --git a/node-hub/dora-outtetts/dora_outtetts/__init__.py b/node-hub/dora-outtetts/dora_outtetts/__init__.py
new file mode 100644
index 00000000..ac3cbef9
--- /dev/null
+++ b/node-hub/dora-outtetts/dora_outtetts/__init__.py
@@ -0,0 +1,11 @@
+import os
+
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+
+# Read the content of the README file
+try:
+    with open(readme_path, "r", encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."
diff --git a/node-hub/dora-outtetts/dora_outtetts/__main__.py b/node-hub/dora-outtetts/dora_outtetts/__main__.py
new file mode 100644
index 00000000..bcbfde6d
--- /dev/null
+++ b/node-hub/dora-outtetts/dora_outtetts/__main__.py
@@ -0,0 +1,5 @@
+from .main import main
+
+
+if __name__ == "__main__":
+    main()
diff --git a/node-hub/dora-outtetts/dora_outtetts/main.py b/node-hub/dora-outtetts/dora_outtetts/main.py
new file mode 100644
index 00000000..52680356
--- /dev/null
+++ b/node-hub/dora-outtetts/dora_outtetts/main.py
@@ -0,0 +1,108 @@
+from dora import Node
+import pyarrow as pa
+import outetts
+import argparse  # Add argparse import
+import pathlib
+import os
+import torch
+
+PATH_SPEAKER = os.getenv("PATH_SPEAKER", "speaker.json")
+
+device = "mps" if torch.backends.mps.is_available() else "cpu"
+device = "cuda:0" if torch.cuda.is_available() else device
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+
+
+def load_interface():
+    if os.getenv("INTERFACE", "") == "HF":
+        model_config = outetts.HFModelConfig_v1(
+            model_path="OuteAI/OuteTTS-0.2-500M",
+            language="en",
+            device=device,
+        )
+
+        interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config)
+    else:
+        model_config = outetts.GGUFModelConfig_v1(
+            model_path="/Users/xaviertao/.cache/huggingface/hub/models--OuteAI--OuteTTS-0.2-500M-GGUF/snapshots/e6d78720d2a8edce2bc8f5c5c2d0332e57091930/OuteTTS-0.2-500M-Q4_0.gguf",
+            language="en",  # Supported languages in v0.2: en, zh, ja, ko
+            n_gpu_layers=0,
+        )
+
+        interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config)
+
+    return interface
+
+
+def create_speaker(interface, path):
+    speaker = interface.create_speaker(
+        audio_path=path,
+        # If transcript is not provided, it will be automatically transcribed using Whisper
+        transcript=None,  # Set to None to use Whisper for transcription
+        whisper_model="turbo",  # Optional: specify Whisper model (default: "turbo")
+        whisper_device=None,  # Optional: specify device for Whisper (default: None)
+    )
+    interface.save_speaker(speaker, "speaker.json")
+
+    print("saved speaker.json")
+    return
+
+
+def main(arg_list: list[str] | None = None):
+    # Parse cli args
+    parser = argparse.ArgumentParser(description="Dora Outetts Node")
+    parser.add_argument("--create-speaker", type=str, help="Path to audio file")
+    parser.add_argument("--test", action="store_true", help="Run tests")
+    args = parser.parse_args(arg_list)
+    if args.test:
+        import pytest
+
+        path = pathlib.Path(__file__).parent.resolve()
+        pytest.main(["-x", path / "tests"])
+
+        return
+
+    interface = load_interface()
+
+    if args.create_speaker:
+        create_speaker(interface, args.create_speaker)
+        return
+
+    if os.path.exists(PATH_SPEAKER):
+        print(f"Loading speaker from {PATH_SPEAKER}")
+        # speaker = interface.load_speaker(PATH_SPEAKER)
+        speaker = interface.load_default_speaker(name="male_1")
+    else:
+        # Load default speaker
+        speaker = interface.load_default_speaker(name="male_1")
+
+    node = Node()
+    i = 0
+
+    for event in node:
+        if event["type"] == "INPUT":
+            if event["id"] == "TICK":
+                print(
+                    f"""Node received:
+                id: {event["id"]},
+                value: {event["value"]},
+                metadata: {event["metadata"]}"""
+                )
+
+            elif event["id"] == "text":
+                # Warning: Make sure to add my_output_id and my_input_id within the dataflow.
+                text = event["value"][0].as_py()
+                print(text)
+                output = interface.generate(
+                    text=text,
+                    temperature=0.1,
+                    repetition_penalty=1.1,
+                    speaker=speaker,  # Optional: speaker profile
+                )
+                i += 1
+                output.save(f"output_{i}.wav")
+                output.play()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/node-hub/dora-outtetts/dora_outtetts/tests/test_main.py b/node-hub/dora-outtetts/dora_outtetts/tests/test_main.py
new file mode 100644
index 00000000..ea6f137c
--- /dev/null
+++ b/node-hub/dora-outtetts/dora_outtetts/tests/test_main.py
@@ -0,0 +1,14 @@
+import pytest
+
+from dora_outtetts.main import load_interface
+from dora_outtetts.main import main
+
+
+def test_import_main():
+    with pytest.raises(RuntimeError):
+        main([])
+
+
+def test_load_interface():
+    interface = load_interface()
+    assert interface is not None
diff --git a/node-hub/dora-outtetts/pyproject.toml b/node-hub/dora-outtetts/pyproject.toml
new file mode 100644
index 00000000..e87e71d0
--- /dev/null
+++ b/node-hub/dora-outtetts/pyproject.toml
@@ -0,0 +1,32 @@
+[project]
+name = "dora-outtetts"
+version = "0.0.0"
+authors = []
+license = "MIT License"
+description = "dora-outtetts"
+readme = "README.md"
+requires-python = ">=3.10"
+
+[tool.poetry]
+homepage = "https://github.com/dora-rs/dora.git"
+documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/dora-outtetts/README.md"
+packages = [{ include = "dora_outtetts" }]
+
+[tool.poetry.dependencies]
+dora-rs = "^0.3.6"
+numpy = "< 2.0.0"
+pyarrow = ">= 5.0.0"
+python = "^3.10"
+outetts = "^0.2.3"
+
+[tool.poetry.dev-dependencies]
+pytest = ">= 6.3.4"
+pylint = ">= 3.3.2"
+black = ">= 22.10"
+
+[tool.poetry.scripts]
+dora-outtetts = "dora_outtetts.main:main"
+
+[build-system]
+requires = ["poetry-core>=1.8.0"]
+build-backend = "poetry.core.masonry.api"