Browse Source

Make whisper better by making it output punctuation

make-qwen-llm-configurable
haixuantao 5 months ago
parent
commit
709e8fec0a
1 changed files with 32 additions and 6 deletions
  1. +32
    -6
      node-hub/dora-distil-whisper/dora_distil_whisper/main.py

+ 32
- 6
node-hub/dora-distil-whisper/dora_distil_whisper/main.py View File

@@ -6,6 +6,7 @@ import sys
import time
from pathlib import Path

import numpy as np
import pyarrow as pa
import torch
from dora import Node
@@ -125,6 +126,8 @@ BAD_SENTENCES = [
"",
" so",
" so so",
"You",
"You ",
"字幕",
"字幕志愿",
"中文字幕",
@@ -181,13 +184,14 @@ def cut_repetition(text, min_repeat_length=4, max_repeat_length=50):

def main():
"""TODO: Add docstring."""
node = Node()
text_noise = ""
noise_timestamp = time.time()
# For macos use mlx:
if sys.platform != "darwin":
pipe = load_model()

node = Node()
noise_timestamp = time.time()
cache_audio = None
for event in node:
if event["type"] == "INPUT":
if "text_noise" in event["id"]:
@@ -200,7 +204,12 @@ def main():
)
noise_timestamp = time.time()
else:
audio = event["value"].to_numpy()
audio_input = event["value"].to_numpy()
if cache_audio is not None:
audio = np.concatenate([cache_audio, audio_input])
else:
audio = audio_input

confg = (
{"language": TARGET_LANGUAGE, "task": "translate"}
if TRANSLATE
@@ -215,6 +224,7 @@ def main():
audio,
path_or_hf_repo="mlx-community/whisper-large-v3-turbo",
append_punctuations=".",
language=TARGET_LANGUAGE,
)

else:
@@ -235,6 +245,22 @@ def main():

if text.strip() == "" or text.strip() == ".":
continue
node.send_output(
"text", pa.array([text]), {"language": TARGET_LANGUAGE},
)

if (
(
text.endswith(".")
or text.endswith("!")
or text.endswith("?")
or text.endswith('."')
or text.endswith('!"')
or text.endswith('?"')
)
and not text.endswith("...") # Avoid ending with ellipsis
):
node.send_output(
"text",
pa.array([text]),
)
cache_audio = None
else:
cache_audio = audio

Loading…
Cancel
Save