Make whisper better by making it output punctuation

7 months ago · 709e8fec0a
--- a/node-hub/dora-distil-whisper/dora_distil_whisper/main.py
+++ b/node-hub/dora-distil-whisper/dora_distil_whisper/main.py
@@ -6,6 +6,7 @@ import sys
 import time
 from pathlib import Path

 import numpy as np
 import pyarrow as pa
 import torch
 from dora import Node
@@ -125,6 +126,8 @@ BAD_SENTENCES = [
    "",
    " so",
    " so so",
    "You",
    "You ",
    "字幕",
    "字幕志愿",
    "中文字幕",
@@ -181,13 +184,14 @@ def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):

 def main():
    """TODO: Add docstring."""
    node = Node()
    text_noise = ""
    noise_timestamp = time.time()
    # For macos use mlx:
    if sys.platform != "darwin":
        pipe = load_model()

    node = Node()
    noise_timestamp = time.time()
    cache_audio = None
    for event in node:
        if event["type"] == "INPUT":
            if "text_noise" in event["id"]:
@@ -200,7 +204,12 @@ def main():
                )
                noise_timestamp = time.time()
            else:
                audio = event["value"].to_numpy()
                audio_input = event["value"].to_numpy()
                if cache_audio is not None:
                    audio = np.concatenate([cache_audio, audio_input])
                else:
                    audio = audio_input

                confg = (
                    {"language": TARGET_LANGUAGE, "task": "translate"}
                    if TRANSLATE
@@ -215,6 +224,7 @@ def main():
                        audio,
                        path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
                        append_punctuations=".",
                        language=TARGET_LANGUAGE,
                    )

                else:
@@ -235,6 +245,22 @@ def main():

                if text.strip() == "" or text.strip() == ".":
                    continue
                node.send_output(
                    "text", pa.array([text]), {"language": TARGET_LANGUAGE},
                )

                if (
                    (
                        text.endswith(".")
                        or text.endswith("!")
                        or text.endswith("?")
                        or text.endswith('."')
                        or text.endswith('!"')
                        or text.endswith('?"')
                    )
                    and not text.endswith("...")  # Avoid ending with ellipsis
                ):
                    node.send_output(
                        "text",
                        pa.array([text]),
                    )
                    cache_audio = None
                else:
                    cache_audio = audio