From 558cf01d5725742342c6aba621d9d701f11ccccb Mon Sep 17 00:00:00 2001 From: "bin.xue" Date: Mon, 1 Aug 2022 20:56:32 +0800 Subject: [PATCH] [to #42322933]feat: split speech-signal-process task to subtasks --- modelscope/models/audio/ans/frcrn.py | 3 +- modelscope/outputs.py | 2 ++ modelscope/pipelines/audio/ans_pipeline.py | 2 +- .../pipelines/audio/linear_aec_pipeline.py | 2 +- modelscope/utils/constant.py | 2 ++ tests/pipelines/test_speech_signal_process.py | 31 +++++++++++++++---- 6 files changed, 33 insertions(+), 9 deletions(-) diff --git a/modelscope/models/audio/ans/frcrn.py b/modelscope/models/audio/ans/frcrn.py index cc580117..38e4d720 100644 --- a/modelscope/models/audio/ans/frcrn.py +++ b/modelscope/models/audio/ans/frcrn.py @@ -59,7 +59,8 @@ class FTB(nn.Module): @MODELS.register_module( - Tasks.speech_signal_process, module_name=Models.speech_frcrn_ans_cirm_16k) + Tasks.acoustic_noise_suppression, + module_name=Models.speech_frcrn_ans_cirm_16k) class FRCRNModel(TorchModel): r""" A decorator of FRCRN for integrating into modelscope framework """ diff --git a/modelscope/outputs.py b/modelscope/outputs.py index 10333855..4d596472 100644 --- a/modelscope/outputs.py +++ b/modelscope/outputs.py @@ -300,6 +300,8 @@ TASK_OUTPUTS = { # "output_pcm": np.array with shape(samples,) and dtype float32 # } Tasks.speech_signal_process: [OutputKeys.OUTPUT_PCM], + Tasks.acoustic_echo_cancellation: [OutputKeys.OUTPUT_PCM], + Tasks.acoustic_noise_suppression: [OutputKeys.OUTPUT_PCM], # ============ multi-modal tasks =================== diff --git a/modelscope/pipelines/audio/ans_pipeline.py b/modelscope/pipelines/audio/ans_pipeline.py index 13d25934..38cb0043 100644 --- a/modelscope/pipelines/audio/ans_pipeline.py +++ b/modelscope/pipelines/audio/ans_pipeline.py @@ -27,7 +27,7 @@ def audio_norm(x): @PIPELINES.register_module( - Tasks.speech_signal_process, + Tasks.acoustic_noise_suppression, module_name=Pipelines.speech_frcrn_ans_cirm_16k) class ANSPipeline(Pipeline): r"""ANS (Acoustic Noise Suppression) Inference Pipeline . diff --git a/modelscope/pipelines/audio/linear_aec_pipeline.py b/modelscope/pipelines/audio/linear_aec_pipeline.py index a73f2e58..ad5f6a3a 100644 --- a/modelscope/pipelines/audio/linear_aec_pipeline.py +++ b/modelscope/pipelines/audio/linear_aec_pipeline.py @@ -48,7 +48,7 @@ def initialize_config(module_cfg): @PIPELINES.register_module( - Tasks.speech_signal_process, + Tasks.acoustic_echo_cancellation, module_name=Pipelines.speech_dfsmn_aec_psm_16k) class LinearAECPipeline(Pipeline): r"""AEC Inference Pipeline only support 16000 sample rate. diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index ec829eaf..87a282ca 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -77,6 +77,8 @@ class AudioTasks(object): auto_speech_recognition = 'auto-speech-recognition' text_to_speech = 'text-to-speech' speech_signal_process = 'speech-signal-process' + acoustic_echo_cancellation = 'acoustic-echo-cancellation' + acoustic_noise_suppression = 'acoustic-noise-suppression' class MultiModalTasks(object): diff --git a/tests/pipelines/test_speech_signal_process.py b/tests/pipelines/test_speech_signal_process.py index 3bcf7f52..22dac2b6 100644 --- a/tests/pipelines/test_speech_signal_process.py +++ b/tests/pipelines/test_speech_signal_process.py @@ -31,7 +31,7 @@ class SpeechSignalProcessTest(unittest.TestCase): def setUp(self) -> None: pass - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_aec(self): # Download audio files download(NEAREND_MIC_URL, NEAREND_MIC_FILE) @@ -42,33 +42,52 @@ class SpeechSignalProcessTest(unittest.TestCase): 'farend_speech': FAREND_SPEECH_FILE } aec = pipeline( - Tasks.speech_signal_process, + Tasks.acoustic_echo_cancellation, model=model_id, pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k) output_path = os.path.abspath('output.wav') aec(input, output_path=output_path) print(f'Processed audio saved to {output_path}') - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') + def test_aec_bytes(self): + # Download audio files + download(NEAREND_MIC_URL, NEAREND_MIC_FILE) + download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE) + model_id = 'damo/speech_dfsmn_aec_psm_16k' + input = {} + with open(NEAREND_MIC_FILE, 'rb') as f: + input['nearend_mic'] = f.read() + with open(FAREND_SPEECH_FILE, 'rb') as f: + input['farend_speech'] = f.read() + aec = pipeline( + Tasks.acoustic_echo_cancellation, + model=model_id, + pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k) + output_path = os.path.abspath('output.wav') + aec(input, output_path=output_path) + print(f'Processed audio saved to {output_path}') + + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_ans(self): # Download audio files download(NOISE_SPEECH_URL, NOISE_SPEECH_FILE) model_id = 'damo/speech_frcrn_ans_cirm_16k' ans = pipeline( - Tasks.speech_signal_process, + Tasks.acoustic_noise_suppression, model=model_id, pipeline_name=Pipelines.speech_frcrn_ans_cirm_16k) output_path = os.path.abspath('output.wav') ans(NOISE_SPEECH_FILE, output_path=output_path) print(f'Processed audio saved to {output_path}') - @unittest.skipUnless(test_level() >= 2, 'skip test in current test level') + @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_ans_bytes(self): # Download audio files download(NOISE_SPEECH_URL, NOISE_SPEECH_FILE) model_id = 'damo/speech_frcrn_ans_cirm_16k' ans = pipeline( - Tasks.speech_signal_process, + Tasks.acoustic_noise_suppression, model=model_id, pipeline_name=Pipelines.speech_frcrn_ans_cirm_16k) output_path = os.path.abspath('output.wav')