Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9285266 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9285266master
| @@ -9,7 +9,8 @@ from modelscope.models.audio.tts.am import SambertNetHifi16k | |||
| from modelscope.models.audio.tts.vocoder import Hifigan16k | |||
| from modelscope.pipelines.base import Pipeline | |||
| from modelscope.pipelines.builder import PIPELINES | |||
| from modelscope.preprocessors import TextToTacotronSymbols, build_preprocessor | |||
| from modelscope.preprocessors import (Preprocessor, TextToTacotronSymbols, | |||
| build_preprocessor) | |||
| from modelscope.utils.constant import Fields, Tasks | |||
| __all__ = ['TextToSpeechSambertHifigan16kPipeline'] | |||
| @@ -20,19 +21,19 @@ __all__ = ['TextToSpeechSambertHifigan16kPipeline'] | |||
| class TextToSpeechSambertHifigan16kPipeline(Pipeline): | |||
| def __init__(self, | |||
| config_file: str = None, | |||
| model: List[Model] = None, | |||
| preprocessor: TextToTacotronSymbols = None, | |||
| model: List[str] = None, | |||
| preprocessor: Preprocessor = None, | |||
| **kwargs): | |||
| super().__init__( | |||
| config_file=config_file, | |||
| model=model, | |||
| preprocessor=preprocessor, | |||
| **kwargs) | |||
| assert len(model) == 2, 'model number should be 2' | |||
| self._am = model[0] | |||
| self._vocoder = model[1] | |||
| self._preprocessor = preprocessor | |||
| assert len(model) == 3, 'model number should be 3' | |||
| if preprocessor is None: | |||
| lang_type = 'pinyin' | |||
| if 'lang_type' in kwargs: | |||
| lang_type = kwargs.lang_type | |||
| preprocessor = TextToTacotronSymbols(model[0], lang_type=lang_type) | |||
| models = [model[1], model[2]] | |||
| super().__init__(model=models, preprocessor=preprocessor, **kwargs) | |||
| self._am = self.models[0] | |||
| self._vocoder = self.models[1] | |||
| def forward(self, inputs: Dict[str, Any]) -> Dict[str, np.ndarray]: | |||
| texts = inputs['texts'] | |||
| @@ -1,6 +1,5 @@ | |||
| import unittest | |||
| import tensorflow as tf | |||
| # NOTICE: Tensorflow 1.15 seems not so compatible with pytorch. | |||
| # A segmentation fault may be raise by pytorch cpp library | |||
| # if 'import tensorflow' in front of 'import torch'. | |||
| @@ -16,6 +15,8 @@ from modelscope.utils.constant import Fields, Tasks | |||
| from modelscope.utils.logger import get_logger | |||
| from modelscope.utils.test_utils import test_level | |||
| import tensorflow as tf # isort:skip | |||
| logger = get_logger() | |||
| @@ -23,33 +24,14 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase): | |||
| @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') | |||
| def test_pipeline(self): | |||
| lang_type = 'pinyin' | |||
| text = '明天天气怎么样' | |||
| preprocessor_model_id = 'damo/speech_binary_tts_frontend_resource' | |||
| am_model_id = 'damo/speech_sambert16k_tts_zhitian_emo' | |||
| voc_model_id = 'damo/speech_hifigan16k_tts_zhitian_emo' | |||
| cfg_preprocessor = dict( | |||
| type=Preprocessors.text_to_tacotron_symbols, | |||
| model_name=preprocessor_model_id, | |||
| lang_type=lang_type) | |||
| preprocessor = build_preprocessor(cfg_preprocessor, Fields.audio) | |||
| self.assertTrue(preprocessor is not None) | |||
| am = Model.from_pretrained(am_model_id) | |||
| self.assertTrue(am is not None) | |||
| voc = Model.from_pretrained(voc_model_id) | |||
| self.assertTrue(voc is not None) | |||
| sambert_tts = pipeline( | |||
| task=Tasks.text_to_speech, | |||
| pipeline_name=Pipelines.sambert_hifigan_16k_tts, | |||
| config_file='', | |||
| model=[am, voc], | |||
| preprocessor=preprocessor) | |||
| model=[preprocessor_model_id, am_model_id, voc_model_id]) | |||
| self.assertTrue(sambert_tts is not None) | |||
| output = sambert_tts(text) | |||
| self.assertTrue(len(output['output']) > 0) | |||
| write('output.wav', 16000, output['output']) | |||