Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10919277 * support new asr paraformer model * support asr conformer modelmaster^2
| @@ -110,6 +110,7 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): | |||||
| 'sampled_lengths': 'seq2seq/sampled_lengths', | 'sampled_lengths': 'seq2seq/sampled_lengths', | ||||
| 'lang': 'zh-cn', | 'lang': 'zh-cn', | ||||
| 'code_base': inputs['code_base'], | 'code_base': inputs['code_base'], | ||||
| 'mode': inputs['mode'], | |||||
| 'fs': { | 'fs': { | ||||
| 'audio_fs': inputs['audio_fs'], | 'audio_fs': inputs['audio_fs'], | ||||
| 'model_fs': 16000 | 'model_fs': 16000 | ||||
| @@ -233,15 +234,16 @@ class AutomaticSpeechRecognitionPipeline(Pipeline): | |||||
| def run_inference(self, cmd): | def run_inference(self, cmd): | ||||
| asr_result = [] | asr_result = [] | ||||
| if self.framework == Frameworks.torch and cmd['code_base'] == 'funasr': | if self.framework == Frameworks.torch and cmd['code_base'] == 'funasr': | ||||
| from funasr.bin import asr_inference_paraformer_modelscope | |||||
| if cmd['mode'] == 'asr': | |||||
| from funasr.bin import asr_inference_modelscope as asr_inference | |||||
| else: | |||||
| from funasr.bin import asr_inference_paraformer_modelscope as asr_inference | |||||
| if hasattr(asr_inference_paraformer_modelscope, 'set_parameters'): | |||||
| asr_inference_paraformer_modelscope.set_parameters( | |||||
| sample_rate=cmd['fs']) | |||||
| asr_inference_paraformer_modelscope.set_parameters( | |||||
| language=cmd['lang']) | |||||
| if hasattr(asr_inference, 'set_parameters'): | |||||
| asr_inference.set_parameters(sample_rate=cmd['fs']) | |||||
| asr_inference.set_parameters(language=cmd['lang']) | |||||
| asr_result = asr_inference_paraformer_modelscope.asr_inference( | |||||
| asr_result = asr_inference.asr_inference( | |||||
| batch_size=cmd['batch_size'], | batch_size=cmd['batch_size'], | ||||
| maxlenratio=cmd['maxlenratio'], | maxlenratio=cmd['maxlenratio'], | ||||
| minlenratio=cmd['minlenratio'], | minlenratio=cmd['minlenratio'], | ||||
| @@ -103,6 +103,12 @@ class WavToScp(Preprocessor): | |||||
| else: | else: | ||||
| code_base = None | code_base = None | ||||
| inputs['code_base'] = code_base | inputs['code_base'] = code_base | ||||
| # decoding mode | |||||
| if 'mode' in inputs['model_config']: | |||||
| mode = inputs['model_config']['mode'] | |||||
| else: | |||||
| mode = None | |||||
| inputs['mode'] = mode | |||||
| if inputs['model_type'] == Frameworks.torch: | if inputs['model_type'] == Frameworks.torch: | ||||
| assert inputs['model_config'].__contains__( | assert inputs['model_config'].__contains__( | ||||
| @@ -111,8 +117,6 @@ class WavToScp(Preprocessor): | |||||
| 'am_model_config'), 'am_model_config does not exist' | 'am_model_config'), 'am_model_config does not exist' | ||||
| assert inputs['model_config'].__contains__( | assert inputs['model_config'].__contains__( | ||||
| 'asr_model_config'), 'asr_model_config does not exist' | 'asr_model_config'), 'asr_model_config does not exist' | ||||
| assert inputs['model_config'].__contains__( | |||||
| 'asr_model_wav_config'), 'asr_model_wav_config does not exist' | |||||
| am_model_config: str = os.path.join( | am_model_config: str = os.path.join( | ||||
| inputs['model_workspace'], | inputs['model_workspace'], | ||||
| @@ -127,9 +131,14 @@ class WavToScp(Preprocessor): | |||||
| assert os.path.exists( | assert os.path.exists( | ||||
| asr_model_config), 'asr_model_config does not exist' | asr_model_config), 'asr_model_config does not exist' | ||||
| asr_model_wav_config: str = os.path.join( | |||||
| inputs['model_workspace'], | |||||
| inputs['model_config']['asr_model_wav_config']) | |||||
| if 'asr_model_wav_config' in inputs['model_config']: | |||||
| asr_model_wav_config: str = os.path.join( | |||||
| inputs['model_workspace'], | |||||
| inputs['model_config']['asr_model_wav_config']) | |||||
| else: | |||||
| asr_model_wav_config: str = os.path.join( | |||||
| inputs['model_workspace'], | |||||
| inputs['model_config']['asr_model_config']) | |||||
| assert os.path.exists( | assert os.path.exists( | ||||
| asr_model_wav_config), 'asr_model_wav_config does not exist' | asr_model_wav_config), 'asr_model_wav_config does not exist' | ||||
| @@ -1,6 +1,6 @@ | |||||
| easyasr>=0.0.2 | easyasr>=0.0.2 | ||||
| espnet==202204 | espnet==202204 | ||||
| funasr>=0.1.0 | |||||
| funasr>=0.1.3 | |||||
| h5py | h5py | ||||
| inflect | inflect | ||||
| keras | keras | ||||
| @@ -217,6 +217,41 @@ class AutomaticSpeechRecognitionTest(unittest.TestCase, | |||||
| 'damo/speech_UniASR_asr_2pass-id-16k-common-vocab1067-tensorflow1-offline', | 'damo/speech_UniASR_asr_2pass-id-16k-common-vocab1067-tensorflow1-offline', | ||||
| 'wav_path': 'data/test/audios/asr_example_id.wav' | 'wav_path': 'data/test/audios/asr_example_id.wav' | ||||
| }, | }, | ||||
| { | |||||
| 'model_id': | |||||
| 'damo/speech_conformer_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch', | |||||
| 'wav_path': 'data/test/audios/asr_example_id.wav' | |||||
| }, | |||||
| { | |||||
| 'model_id': | |||||
| 'damo/speech_conformer_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch', | |||||
| 'wav_path': 'data/test/audios/asr_example_id.wav' | |||||
| }, | |||||
| { | |||||
| 'model_id': | |||||
| 'damo/speech_paraformer_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch', | |||||
| 'wav_path': 'data/test/audios/asr_example_id.wav' | |||||
| }, | |||||
| { | |||||
| 'model_id': | |||||
| 'damo/speech_paraformer_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch', | |||||
| 'wav_path': 'data/test/audios/asr_example_id.wav' | |||||
| }, | |||||
| { | |||||
| 'model_id': | |||||
| 'damo/speech_paraformerbert_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch', | |||||
| 'wav_path': 'data/test/audios/asr_example_id.wav' | |||||
| }, | |||||
| { | |||||
| 'model_id': | |||||
| 'damo/speech_paraformerbert_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch', | |||||
| 'wav_path': 'data/test/audios/asr_example_id.wav' | |||||
| }, | |||||
| { | |||||
| 'model_id': | |||||
| 'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch', | |||||
| 'wav_path': 'data/test/audios/asr_example_id.wav' | |||||
| }, | |||||
| ] | ] | ||||
| def setUp(self) -> None: | def setUp(self) -> None: | ||||