Browse Source

Try to remove as much noise as possible within phi4

tags/v0.3.11-rc1
haixuanTao 10 months ago
parent
commit
ee7e72e300
2 changed files with 28 additions and 2 deletions
  1. +27
    -1
      node-hub/dora-phi4/dora_phi4/main.py
  2. +1
    -1
      node-hub/dora-pyaudio/dora_pyaudio/main.py

+ 27
- 1
node-hub/dora-phi4/dora_phi4/main.py View File

@@ -70,17 +70,39 @@ LEAD_MODALITY = os.getenv("LEAD_MODALITY", "text")
BAD_SENTENCES = [
"The stock market closed down by 0.1%.",
"The stock market closed down by 0.1 percent.",
"The stock market closed down by one.",
"The market is closed on Monday and Tuesday.",
"The market is closed on Mondays and Tuesdays.",
"the first is the of the internet communicate people",
"The first time I saw the movie, I was very impressed.",
"The first one is the one that is the most important.",
"The first one is the one that is the most common.",
"The first time I saw the sea, I was very young.",
"The first time I saw the sea was when I was a child.",
"The sound of the wind is so loud.",
"The first time I saw the sea.",
"the first time saw the sea i was so happy"
"The first time I saw the sea was in the movie.",
"The first time I saw the movie was in the theater.",
"The first time I saw the movie.",
"the first i saw the video i was very impressed",
"the first time saw the video i was like my god",
"i am a student at the university of toronto",
"I don't know what to do.",
"I don't know.",
"translator elisabeth buffard reviewer denise rq",
"Translator Elisabeth Buffard Reviewer Denise RQ.",
"Translator Denise RQ Reviewer Denise RQ.",
"the company also has a presence the united states canada brazil argentina mexico colombia peru chile uruguay",
"the company also has a presence the united states canada brazil mexico argentina colombia chile peru and uruguay",
"the company also has a presence the united states canada brazil argentina mexico and the united kingdom",
"the company also presence in the united states canada brazil argentina mexico colombia peru chile uruguay",
"The company also has a presence in the United States, Canada, Brazil, Argentina, Mexico, Colombia, Peru, Chile, and Uruguay."
"the company also has presence in the united states canada the united kingdom",
"company also has a presence the united states canada brazil argentina colombia mexico peru chile and uruguay",
"the company also has a presence the united states canada and the united kingdom",
"the company also announced that it will acquire the online retailer zappos",
"the company has a market capitalization of 15 trillion yen",

]


@@ -264,6 +286,10 @@ def main():

if response in BAD_SENTENCES:
continue
if "company also has a presence" in response:
continue
elif "The first time I saw the" in response:
continue
## Remove text noise independently of casing
response = remove_text_noise(response, text_noise)
if response.strip() == "" or response.strip() == ".":


+ 1
- 1
node-hub/dora-pyaudio/dora_pyaudio/main.py View File

@@ -41,7 +41,7 @@ def main():
sr = SAMPLE_RATE
i = 0
while True:
event = node.next(timeout=0.01)
event = node.next(timeout=0.005)
if event is None:
break
if event["type"] == "INPUT":


Loading…
Cancel
Save