Browse Source

Adding outtetts as TTS

tags/v0.3.9-rc1
haixuantao 1 year ago
parent
commit
4c59df0342
6 changed files with 207 additions and 0 deletions
  1. +37
    -0
      node-hub/dora-outtetts/README.md
  2. +11
    -0
      node-hub/dora-outtetts/dora_outtetts/__init__.py
  3. +5
    -0
      node-hub/dora-outtetts/dora_outtetts/__main__.py
  4. +108
    -0
      node-hub/dora-outtetts/dora_outtetts/main.py
  5. +14
    -0
      node-hub/dora-outtetts/dora_outtetts/tests/test_main.py
  6. +32
    -0
      node-hub/dora-outtetts/pyproject.toml

+ 37
- 0
node-hub/dora-outtetts/README.md View File

@@ -0,0 +1,37 @@
# dora-outtetts

## Getting started

- Install it with pip:

```bash
pip install -e .
```

## Contribution Guide

- Format with [black](https://github.com/psf/black):

```bash
black . # Format
```

- Lint with [pylint](https://github.com/pylint-dev/pylint):

```bash
pylint --disable=C,R --ignored-modules=cv2 . # Lint
```

- Test with [pytest](https://github.com/pytest-dev/pytest)

```bash
pytest . # Test
```

## YAML Specification

## Examples

## License

dora-outtetts's code are released under the MIT License

+ 11
- 0
node-hub/dora-outtetts/dora_outtetts/__init__.py View File

@@ -0,0 +1,11 @@
import os

# Define the path to the README file relative to the package directory
readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")

# Read the content of the README file
try:
with open(readme_path, "r", encoding="utf-8") as f:
__doc__ = f.read()
except FileNotFoundError:
__doc__ = "README file not found."

+ 5
- 0
node-hub/dora-outtetts/dora_outtetts/__main__.py View File

@@ -0,0 +1,5 @@
from .main import main


if __name__ == "__main__":
main()

+ 108
- 0
node-hub/dora-outtetts/dora_outtetts/main.py View File

@@ -0,0 +1,108 @@
from dora import Node
import pyarrow as pa
import outetts
import argparse # Add argparse import
import pathlib
import os
import torch

PATH_SPEAKER = os.getenv("PATH_SPEAKER", "speaker.json")

device = "mps" if torch.backends.mps.is_available() else "cpu"
device = "cuda:0" if torch.cuda.is_available() else device
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32


def load_interface():
if os.getenv("INTERFACE", "") == "HF":
model_config = outetts.HFModelConfig_v1(
model_path="OuteAI/OuteTTS-0.2-500M",
language="en",
device=device,
)

interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config)
else:
model_config = outetts.GGUFModelConfig_v1(
model_path="/Users/xaviertao/.cache/huggingface/hub/models--OuteAI--OuteTTS-0.2-500M-GGUF/snapshots/e6d78720d2a8edce2bc8f5c5c2d0332e57091930/OuteTTS-0.2-500M-Q4_0.gguf",
language="en", # Supported languages in v0.2: en, zh, ja, ko
n_gpu_layers=0,
)

interface = outetts.InterfaceGGUF(model_version="0.2", cfg=model_config)

return interface


def create_speaker(interface, path):
speaker = interface.create_speaker(
audio_path=path,
# If transcript is not provided, it will be automatically transcribed using Whisper
transcript=None, # Set to None to use Whisper for transcription
whisper_model="turbo", # Optional: specify Whisper model (default: "turbo")
whisper_device=None, # Optional: specify device for Whisper (default: None)
)
interface.save_speaker(speaker, "speaker.json")

print("saved speaker.json")
return


def main(arg_list: list[str] | None = None):
# Parse cli args
parser = argparse.ArgumentParser(description="Dora Outetts Node")
parser.add_argument("--create-speaker", type=str, help="Path to audio file")
parser.add_argument("--test", action="store_true", help="Run tests")
args = parser.parse_args(arg_list)
if args.test:
import pytest

path = pathlib.Path(__file__).parent.resolve()
pytest.main(["-x", path / "tests"])

return

interface = load_interface()

if args.create_speaker:
create_speaker(interface, args.create_speaker)
return

if os.path.exists(PATH_SPEAKER):
print(f"Loading speaker from {PATH_SPEAKER}")
# speaker = interface.load_speaker(PATH_SPEAKER)
speaker = interface.load_default_speaker(name="male_1")
else:
# Load default speaker
speaker = interface.load_default_speaker(name="male_1")

node = Node()
i = 0

for event in node:
if event["type"] == "INPUT":
if event["id"] == "TICK":
print(
f"""Node received:
id: {event["id"]},
value: {event["value"]},
metadata: {event["metadata"]}"""
)

elif event["id"] == "text":
# Warning: Make sure to add my_output_id and my_input_id within the dataflow.
text = event["value"][0].as_py()
print(text)
output = interface.generate(
text=text,
temperature=0.1,
repetition_penalty=1.1,
speaker=speaker, # Optional: speaker profile
)
i += 1
output.save(f"output_{i}.wav")
output.play()


if __name__ == "__main__":
main()

+ 14
- 0
node-hub/dora-outtetts/dora_outtetts/tests/test_main.py View File

@@ -0,0 +1,14 @@
import pytest

from dora_outtetts.main import load_interface
from dora_outtetts.main import main


def test_import_main():
with pytest.raises(RuntimeError):
main([])


def test_load_interface():
interface = load_interface()
assert interface is not None

+ 32
- 0
node-hub/dora-outtetts/pyproject.toml View File

@@ -0,0 +1,32 @@
[project]
name = "dora-outtetts"
version = "0.0.0"
authors = []
license = "MIT License"
description = "dora-outtetts"
readme = "README.md"
requires-python = ">=3.10"

[tool.poetry]
homepage = "https://github.com/dora-rs/dora.git"
documentation = "https://github.com/dora-rs/dora/blob/main/node-hub/dora-outtetts/README.md"
packages = [{ include = "dora_outtetts" }]

[tool.poetry.dependencies]
dora-rs = "^0.3.6"
numpy = "< 2.0.0"
pyarrow = ">= 5.0.0"
python = "^3.10"
outetts = "^0.2.3"

[tool.poetry.dev-dependencies]
pytest = ">= 6.3.4"
pylint = ">= 3.3.2"
black = ">= 22.10"

[tool.poetry.scripts]
dora-outtetts = "dora_outtetts.main:main"

[build-system]
requires = ["poetry-core>=1.8.0"]
build-backend = "poetry.core.masonry.api"

Loading…
Cancel
Save