Browse Source

[to #42322933] feat: aec pipeline also accept tuple and add test

master
bin.xue 3 years ago
parent
commit
b2be1abcad
2 changed files with 41 additions and 12 deletions
  1. +22
    -12
      modelscope/preprocessors/audio.py
  2. +19
    -0
      tests/pipelines/test_speech_signal_process.py

+ 22
- 12
modelscope/preprocessors/audio.py View File

@@ -1,12 +1,13 @@
import io import io
import os import os
from typing import Any, Dict
from typing import Any, Dict, Tuple, Union


import numpy as np import numpy as np
import scipy.io.wavfile as wav import scipy.io.wavfile as wav
import torch import torch


from modelscope.utils.constant import Fields from modelscope.utils.constant import Fields
from . import Preprocessor
from .builder import PREPROCESSORS from .builder import PREPROCESSORS




@@ -115,7 +116,7 @@ class Feature:




@PREPROCESSORS.register_module(Fields.audio) @PREPROCESSORS.register_module(Fields.audio)
class LinearAECAndFbank:
class LinearAECAndFbank(Preprocessor):
SAMPLE_RATE = 16000 SAMPLE_RATE = 16000


def __init__(self, io_config): def __init__(self, io_config):
@@ -127,18 +128,27 @@ class LinearAECAndFbank:
self.mitaec = MinDAEC.load() self.mitaec = MinDAEC.load()
self.mask_on_mic = io_config['mask_on'] == 'nearend_mic' self.mask_on_mic = io_config['mask_on'] == 'nearend_mic'


def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
""" linear filtering the near end mic and far end audio, then extract the feature
:param data: dict with two keys and correspond audios: "nearend_mic" and "farend_speech"
:return: dict with two keys and Tensor values: "base" linear filtered audio,and "feature"
def __call__(self, data: Union[Tuple, Dict[str, Any]]) -> Dict[str, Any]:
""" Linear filtering the near end mic and far end audio, then extract the feature.

Args:
data: Dict with two keys and correspond audios: "nearend_mic" and "farend_speech".

Returns:
Dict with two keys and Tensor values: "base" linear filtered audio,and "feature"
""" """
# read files
nearend_mic, fs = self.load_wav(data['nearend_mic'])
farend_speech, fs = self.load_wav(data['farend_speech'])
if 'nearend_speech' in data:
nearend_speech, fs = self.load_wav(data['nearend_speech'])
else:
if isinstance(data, tuple):
nearend_mic, fs = self.load_wav(data[0])
farend_speech, fs = self.load_wav(data[1])
nearend_speech = np.zeros_like(nearend_mic) nearend_speech = np.zeros_like(nearend_mic)
else:
# read files
nearend_mic, fs = self.load_wav(data['nearend_mic'])
farend_speech, fs = self.load_wav(data['farend_speech'])
if 'nearend_speech' in data:
nearend_speech, fs = self.load_wav(data['nearend_speech'])
else:
nearend_speech = np.zeros_like(nearend_mic)


out_mic, out_ref, out_linear, out_echo = self.mitaec.do_linear_aec( out_mic, out_ref, out_linear, out_echo = self.mitaec.do_linear_aec(
nearend_mic, farend_speech) nearend_mic, farend_speech)


+ 19
- 0
tests/pipelines/test_speech_signal_process.py View File

@@ -68,6 +68,25 @@ class SpeechSignalProcessTest(unittest.TestCase):
aec(input, output_path=output_path) aec(input, output_path=output_path)
print(f'Processed audio saved to {output_path}') print(f'Processed audio saved to {output_path}')


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_aec_tuple_bytes(self):
# Download audio files
download(NEAREND_MIC_URL, NEAREND_MIC_FILE)
download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE)
model_id = 'damo/speech_dfsmn_aec_psm_16k'
with open(NEAREND_MIC_FILE, 'rb') as f:
nearend_bytes = f.read()
with open(FAREND_SPEECH_FILE, 'rb') as f:
farend_bytes = f.read()
inputs = (nearend_bytes, farend_bytes)
aec = pipeline(
Tasks.acoustic_echo_cancellation,
model=model_id,
pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k)
output_path = os.path.abspath('output.wav')
aec(inputs, output_path=output_path)
print(f'Processed audio saved to {output_path}')

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ans(self): def test_ans(self):
# Download audio files # Download audio files


Loading…
Cancel
Save