Browse Source

[to #42322933]feat: split speech-signal-process task to subtasks

master
bin.xue 3 years ago
parent
commit
558cf01d57
6 changed files with 33 additions and 9 deletions
  1. +2
    -1
      modelscope/models/audio/ans/frcrn.py
  2. +2
    -0
      modelscope/outputs.py
  3. +1
    -1
      modelscope/pipelines/audio/ans_pipeline.py
  4. +1
    -1
      modelscope/pipelines/audio/linear_aec_pipeline.py
  5. +2
    -0
      modelscope/utils/constant.py
  6. +25
    -6
      tests/pipelines/test_speech_signal_process.py

+ 2
- 1
modelscope/models/audio/ans/frcrn.py View File

@@ -59,7 +59,8 @@ class FTB(nn.Module):


@MODELS.register_module(
Tasks.speech_signal_process, module_name=Models.speech_frcrn_ans_cirm_16k)
Tasks.acoustic_noise_suppression,
module_name=Models.speech_frcrn_ans_cirm_16k)
class FRCRNModel(TorchModel):
r""" A decorator of FRCRN for integrating into modelscope framework """



+ 2
- 0
modelscope/outputs.py View File

@@ -300,6 +300,8 @@ TASK_OUTPUTS = {
# "output_pcm": np.array with shape(samples,) and dtype float32
# }
Tasks.speech_signal_process: [OutputKeys.OUTPUT_PCM],
Tasks.acoustic_echo_cancellation: [OutputKeys.OUTPUT_PCM],
Tasks.acoustic_noise_suppression: [OutputKeys.OUTPUT_PCM],

# ============ multi-modal tasks ===================



+ 1
- 1
modelscope/pipelines/audio/ans_pipeline.py View File

@@ -27,7 +27,7 @@ def audio_norm(x):


@PIPELINES.register_module(
Tasks.speech_signal_process,
Tasks.acoustic_noise_suppression,
module_name=Pipelines.speech_frcrn_ans_cirm_16k)
class ANSPipeline(Pipeline):
r"""ANS (Acoustic Noise Suppression) Inference Pipeline .


+ 1
- 1
modelscope/pipelines/audio/linear_aec_pipeline.py View File

@@ -48,7 +48,7 @@ def initialize_config(module_cfg):


@PIPELINES.register_module(
Tasks.speech_signal_process,
Tasks.acoustic_echo_cancellation,
module_name=Pipelines.speech_dfsmn_aec_psm_16k)
class LinearAECPipeline(Pipeline):
r"""AEC Inference Pipeline only support 16000 sample rate.


+ 2
- 0
modelscope/utils/constant.py View File

@@ -77,6 +77,8 @@ class AudioTasks(object):
auto_speech_recognition = 'auto-speech-recognition'
text_to_speech = 'text-to-speech'
speech_signal_process = 'speech-signal-process'
acoustic_echo_cancellation = 'acoustic-echo-cancellation'
acoustic_noise_suppression = 'acoustic-noise-suppression'


class MultiModalTasks(object):


+ 25
- 6
tests/pipelines/test_speech_signal_process.py View File

@@ -31,7 +31,7 @@ class SpeechSignalProcessTest(unittest.TestCase):
def setUp(self) -> None:
pass

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_aec(self):
# Download audio files
download(NEAREND_MIC_URL, NEAREND_MIC_FILE)
@@ -42,33 +42,52 @@ class SpeechSignalProcessTest(unittest.TestCase):
'farend_speech': FAREND_SPEECH_FILE
}
aec = pipeline(
Tasks.speech_signal_process,
Tasks.acoustic_echo_cancellation,
model=model_id,
pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k)
output_path = os.path.abspath('output.wav')
aec(input, output_path=output_path)
print(f'Processed audio saved to {output_path}')

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_aec_bytes(self):
# Download audio files
download(NEAREND_MIC_URL, NEAREND_MIC_FILE)
download(FAREND_SPEECH_URL, FAREND_SPEECH_FILE)
model_id = 'damo/speech_dfsmn_aec_psm_16k'
input = {}
with open(NEAREND_MIC_FILE, 'rb') as f:
input['nearend_mic'] = f.read()
with open(FAREND_SPEECH_FILE, 'rb') as f:
input['farend_speech'] = f.read()
aec = pipeline(
Tasks.acoustic_echo_cancellation,
model=model_id,
pipeline_name=Pipelines.speech_dfsmn_aec_psm_16k)
output_path = os.path.abspath('output.wav')
aec(input, output_path=output_path)
print(f'Processed audio saved to {output_path}')

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ans(self):
# Download audio files
download(NOISE_SPEECH_URL, NOISE_SPEECH_FILE)
model_id = 'damo/speech_frcrn_ans_cirm_16k'
ans = pipeline(
Tasks.speech_signal_process,
Tasks.acoustic_noise_suppression,
model=model_id,
pipeline_name=Pipelines.speech_frcrn_ans_cirm_16k)
output_path = os.path.abspath('output.wav')
ans(NOISE_SPEECH_FILE, output_path=output_path)
print(f'Processed audio saved to {output_path}')

@unittest.skipUnless(test_level() >= 2, 'skip test in current test level')
@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ans_bytes(self):
# Download audio files
download(NOISE_SPEECH_URL, NOISE_SPEECH_FILE)
model_id = 'damo/speech_frcrn_ans_cirm_16k'
ans = pipeline(
Tasks.speech_signal_process,
Tasks.acoustic_noise_suppression,
model=model_id,
pipeline_name=Pipelines.speech_frcrn_ans_cirm_16k)
output_path = os.path.abspath('output.wav')


Loading…
Cancel
Save