From db7c5d14941e6de0982cfbd6573424b8db9cfc96 Mon Sep 17 00:00:00 2001 From: "jiangyu.xzy" Date: Mon, 5 Dec 2022 09:59:55 +0800 Subject: [PATCH] =?UTF-8?q?asr=20=E7=BB=9F=E4=B8=80=E6=8E=A5=E5=8F=A3?= =?UTF-8?q?=EF=BC=8C=E6=94=AF=E6=8C=81conformer=E5=92=8Cuniasr=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=20=20=20=20=20=20=20=20=20Link:=20https://code.alibab?= =?UTF-8?q?a-inc.com/Ali-MaaS/MaaS-lib/codereview/10964641?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * support new asr paraformer model * support asr conformer model * add new asr model tests * fix format * support new in params * fix conflict * type fix * fix conflict --- .../pipelines/audio/asr_inference_pipeline.py | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/modelscope/pipelines/audio/asr_inference_pipeline.py b/modelscope/pipelines/audio/asr_inference_pipeline.py index db23b06f..137d3ceb 100644 --- a/modelscope/pipelines/audio/asr_inference_pipeline.py +++ b/modelscope/pipelines/audio/asr_inference_pipeline.py @@ -124,6 +124,15 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): frontend_conf = None if 'frontend_conf' in root: frontend_conf = root['frontend_conf'] + token_num_relax = None + if 'token_num_relax' in root: + token_num_relax = root['token_num_relax'] + decoding_ind = None + if 'decoding_ind' in root: + decoding_ind = root['decoding_ind'] + decoding_mode = None + if 'decoding_mode' in root: + decoding_mode = root['decoding_mode'] cmd['beam_size'] = root['beam_size'] cmd['penalty'] = root['penalty'] @@ -138,6 +147,9 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): cmd['frontend_conf'] = frontend_conf if frontend_conf is not None and 'fs' in frontend_conf: cmd['fs']['model_fs'] = frontend_conf['fs'] + cmd['token_num_relax'] = token_num_relax + cmd['decoding_ind'] = decoding_ind + cmd['decoding_mode'] = decoding_mode elif self.framework == Frameworks.tf: cmd['fs']['model_fs'] = inputs['model_config']['fs'] @@ -234,16 +246,14 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): def run_inference(self, cmd): asr_result = [] if self.framework == Frameworks.torch and cmd['code_base'] == 'funasr': - if cmd['mode'] == 'asr': - from funasr.bin import asr_inference_modelscope as asr_inference - else: - from funasr.bin import asr_inference_paraformer_modelscope as asr_inference + from funasr.bin import asr_inference_launch - if hasattr(asr_inference, 'set_parameters'): - asr_inference.set_parameters(sample_rate=cmd['fs']) - asr_inference.set_parameters(language=cmd['lang']) + if hasattr(asr_inference_launch, 'set_parameters'): + asr_inference_launch.set_parameters(sample_rate=cmd['fs']) + asr_inference_launch.set_parameters(language=cmd['lang']) - asr_result = asr_inference.asr_inference( + asr_result = asr_inference_launch.inference_launch( + mode=cmd['mode'], batch_size=cmd['batch_size'], maxlenratio=cmd['maxlenratio'], minlenratio=cmd['minlenratio'], @@ -253,13 +263,16 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): lm_weight=cmd['lm_weight'], penalty=cmd['penalty'], log_level=cmd['log_level'], - name_and_type=cmd['name_and_type'], + data_path_and_name_and_type=cmd['name_and_type'], audio_lists=cmd['audio_in'], asr_train_config=cmd['asr_train_config'], asr_model_file=cmd['asr_model_file'], lm_file=cmd['lm_file'], lm_train_config=cmd['lm_train_config'], - frontend_conf=cmd['frontend_conf']) + frontend_conf=cmd['frontend_conf'], + token_num_relax=cmd['token_num_relax'], + decoding_ind=cmd['decoding_ind'], + decoding_mode=cmd['decoding_mode']) elif self.framework == Frameworks.torch: from easyasr import asr_inference_paraformer_espnet