From 4c59df0342ef394407c22c2843b93dfd0161079c Mon Sep 17 00:00:00 2001 From: haixuantao Date: Mon, 13 Jan 2025 23:17:01 +0100 Subject: [PATCH] Adding outtetts as TTS --- node-hub/dora-outtetts/README.md | 37 ++++++ .../dora-outtetts/dora_outtetts/__init__.py | 11 ++ .../dora-outtetts/dora_outtetts/__main__.py | 5 + node-hub/dora-outtetts/dora_outtetts/main.py | 108 ++++++++++++++++++ .../dora_outtetts/tests/test_main.py | 14 +++ node-hub/dora-outtetts/pyproject.toml | 32 ++++++ 6 files changed, 207 insertions(+) create mode 100644 node-hub/dora-outtetts/README.md create mode 100644 node-hub/dora-outtetts/dora_outtetts/__init__.py create mode 100644 node-hub/dora-outtetts/dora_outtetts/__main__.py create mode 100644 node-hub/dora-outtetts/dora_outtetts/main.py create mode 100644 node-hub/dora-outtetts/dora_outtetts/tests/test_main.py create mode 100644 node-hub/dora-outtetts/pyproject.toml diff --git a/node-hub/dora-outtetts/README.md b/node-hub/dora-outtetts/README.md new file mode 100644 index 00000000..72d8b916 --- /dev/null +++ b/node-hub/dora-outtetts/README.md @@ -0,0 +1,37 @@ +# dora-outtetts + +## Getting started + +- Install it with pip: + +```bash +pip install -e . +``` + +## Contribution Guide + +- Format with [black](https://github.com/psf/black): + +```bash +black . # Format +``` + +- Lint with [pylint](https://github.com/pylint-dev/pylint): + +```bash +pylint --disable=C,R --ignored-modules=cv2 . # Lint +``` + +- Test with [pytest](https://github.com/pytest-dev/pytest) + +```bash +pytest . # Test +``` + +## YAML Specification + +## Examples + +## License + +dora-outtetts's code are released under the MIT License diff --git a/node-hub/dora-outtetts/dora_outtetts/__init__.py b/node-hub/dora-outtetts/dora_outtetts/__init__.py new file mode 100644 index 00000000..ac3cbef9 --- /dev/null +++ b/node-hub/dora-outtetts/dora_outtetts/__init__.py @@ -0,0 +1,11 @@ +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, "r", encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/dora-outtetts/dora_outtetts/__main__.py b/node-hub/dora-outtetts/dora_outtetts/__main__.py new file mode 100644 index 00000000..bcbfde6d --- /dev/null +++ b/node-hub/dora-outtetts/dora_outtetts/__main__.py @@ -0,0 +1,5 @@ +from .main import main + + +if __name__ == "__main__": + main() diff --git a/node-hub/dora-outtetts/dora_outtetts/main.py b/node-hub/dora-outtetts/dora_outtetts/main.py new file mode 100644 index 00000000..52680356 --- /dev/null +++ b/node-hub/dora-outtetts/dora_outtetts/main.py @@ -0,0 +1,108 @@ +from dora import Node +import pyarrow as pa +import outetts +import argparse # Add argparse import +import pathlib +import os +import torch + +PATH_SPEAKER = os.getenv("PATH_SPEAKER", "speaker.json") + +device = "mps" if torch.backends.mps.is_available() else "cpu" +device = "cuda:0" if torch.cuda.is_available() else device +torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 + + +def load_interface(): + if os.getenv("INTERFACE", "") == "HF": + model_config = outetts.HFModelConfig_v1( + model_path="OuteAI/OuteTTS-0.2-500M", + language="en", + device=device, + ) + + interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config) + else: + model_config = outetts.GGUFModelConfig_v1( + model_path="/Users/xaviertao/.cache/huggingface/hub/models--OuteAI--OuteTTS-0.2-500M-GGUF/snapshots/e6d78720d2a8edce2bc8f5c5c2d0332e57091930/OuteTTS-0.2-500M-Q4_0.gguf", + language="en", # Supported languages in v0.2: en, zh, ja, ko + n_gpu_layers=0, + ) + + interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config) + + return interface + + +def create_speaker(interface, path): + speaker = interface.create_speaker( + audio_path=path, + # If transcript is not provided, it will be automatically transcribed using Whisper + transcript=None, # Set to None to use Whisper for transcription + whisper_model="turbo", # Optional: specify Whisper model (default: "turbo") + whisper_device=None, # Optional: specify device for Whisper (default: None) + ) + interface.save_speaker(speaker, "speaker.json") + + print("saved speaker.json") + return + + +def main(arg_list: list[str] | None = None): + # Parse cli args + parser = argparse.ArgumentParser(description="Dora Outetts Node") + parser.add_argument("--create-speaker", type=str, help="Path to audio file") + parser.add_argument("--test", action="store_true", help="Run tests") + args = parser.parse_args(arg_list) + if args.test: + import pytest + + path = pathlib.Path(__file__).parent.resolve() + pytest.main(["-x", path / "tests"]) + + return + + interface = load_interface() + + if args.create_speaker: + create_speaker(interface, args.create_speaker) + return + + if os.path.exists(PATH_SPEAKER): + print(f"Loading speaker from {PATH_SPEAKER}") + # speaker = interface.load_speaker(PATH_SPEAKER) + speaker = interface.load_default_speaker(name="male_1") + else: + # Load default speaker + speaker = interface.load_default_speaker(name="male_1") + + node = Node() + i = 0 + + for event in node: + if event["type"] == "INPUT": + if event["id"] == "TICK": + print( + f"""Node received: + id: {event["id"]}, + value: {event["value"]}, + metadata: {event["metadata"]}""" + ) + + elif event["id"] == "text": + # Warning: Make sure to add my_output_id and my_input_id within the dataflow. + text = event["value"][0].as_py() + print(text) + output = interface.generate( + text=text, + temperature=0.1, + repetition_penalty=1.1, + speaker=speaker, # Optional: speaker profile + ) + i += 1 + output.save(f"output_{i}.wav") + output.play() + + +if __name__ == "__main__": + main() diff --git a/node-hub/dora-outtetts/dora_outtetts/tests/test_main.py b/node-hub/dora-outtetts/dora_outtetts/tests/test_main.py new file mode 100644 index 00000000..ea6f137c --- /dev/null +++ b/node-hub/dora-outtetts/dora_outtetts/tests/test_main.py @@ -0,0 +1,14 @@ +import pytest + +from dora_outtetts.main import load_interface +from dora_outtetts.main import main + + +def test_import_main(): + with pytest.raises(RuntimeError): + main([]) + + +def test_load_interface(): + interface = load_interface() + assert interface is not None diff --git a/node-hub/dora-outtetts/pyproject.toml b/node-hub/dora-outtetts/pyproject.toml new file mode 100644 index 00000000..e87e71d0 --- /dev/null +++ b/node-hub/dora-outtetts/pyproject.toml @@ -0,0 +1,32 @@ +[project] +name = "dora-outtetts" +version = "0.0.0" +authors = [] +license = "MIT License" +description = "dora-outtetts" +readme = "README.md" +requires-python = ">=3.10" + +[tool.poetry] +homepage = "https://github.com/dora-rs/dora.git" +documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/dora-outtetts/README.md" +packages = [{ include = "dora_outtetts" }] + +[tool.poetry.dependencies] +dora-rs = "^0.3.6" +numpy = "< 2.0.0" +pyarrow = ">= 5.0.0" +python = "^3.10" +outetts = "^0.2.3" + +[tool.poetry.dev-dependencies] +pytest = ">= 6.3.4" +pylint = ">= 3.3.2" +black = ">= 22.10" + +[tool.poetry.scripts] +dora-outtetts = "dora_outtetts.main:main" + +[build-system] +requires = ["poetry-core>=1.8.0"] +build-backend = "poetry.core.masonry.api"