[to #9285266] tts pipeline using model id params

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9285266 Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/9285266
3 years ago · 9b7e68b67e
--- a/modelscope/pipelines/audio/text_to_speech_pipeline.py
+++ b/modelscope/pipelines/audio/text_to_speech_pipeline.py
@@ -9,7 +9,8 @@ from modelscope.models.audio.tts.am import SambertNetHifi16k
 from modelscope.models.audio.tts.vocoder import Hifigan16k
 from modelscope.pipelines.base import Pipeline
 from modelscope.pipelines.builder import PIPELINES
 from modelscope.preprocessors import TextToTacotronSymbols, build_preprocessor
 from modelscope.preprocessors import (Preprocessor, TextToTacotronSymbols,
                                      build_preprocessor)
 from modelscope.utils.constant import Fields, Tasks

 __all__ = ['TextToSpeechSambertHifigan16kPipeline']
@@ -20,19 +21,19 @@ __all__ = ['TextToSpeechSambertHifigan16kPipeline']
 class TextToSpeechSambertHifigan16kPipeline(Pipeline):

    def __init__(self,
                 config_file: str = None,
                 model: List[Model] = None,
                 preprocessor: TextToTacotronSymbols = None,
                 model: List[str] = None,
                 preprocessor: Preprocessor = None,
                 **kwargs):
        super().__init__(
            config_file=config_file,
            model=model,
            preprocessor=preprocessor,
            **kwargs)
        assert len(model) == 2, 'model number should be 2'
        self._am = model[0]
        self._vocoder = model[1]
        self._preprocessor = preprocessor
        assert len(model) == 3, 'model number should be 3'
        if preprocessor is None:
            lang_type = 'pinyin'
            if 'lang_type' in kwargs:
                lang_type = kwargs.lang_type
            preprocessor = TextToTacotronSymbols(model[0], lang_type=lang_type)
        models = [model[1], model[2]]
        super().__init__(model=models, preprocessor=preprocessor, **kwargs)
        self._am = self.models[0]
        self._vocoder = self.models[1]

    def forward(self, inputs: Dict[str, Any]) -> Dict[str, np.ndarray]:
        texts = inputs['texts']
--- a/tests/pipelines/test_text_to_speech.py
+++ b/tests/pipelines/test_text_to_speech.py
@@ -1,6 +1,5 @@
 import unittest

 import tensorflow as tf
 # NOTICE: Tensorflow 1.15 seems not so compatible with pytorch.
 #         A segmentation fault may be raise by pytorch cpp library
 #         if 'import tensorflow' in front of 'import torch'.
@@ -16,6 +15,8 @@ from modelscope.utils.constant import Fields, Tasks
 from modelscope.utils.logger import get_logger
 from modelscope.utils.test_utils import test_level

 import tensorflow as tf  # isort:skip

 logger = get_logger()


@@ -23,33 +24,14 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase):

    @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
    def test_pipeline(self):
        lang_type = 'pinyin'
        text = '明天天气怎么样'
        preprocessor_model_id = 'damo/speech_binary_tts_frontend_resource'
        am_model_id = 'damo/speech_sambert16k_tts_zhitian_emo'
        voc_model_id = 'damo/speech_hifigan16k_tts_zhitian_emo'

        cfg_preprocessor = dict(
            type=Preprocessors.text_to_tacotron_symbols,
            model_name=preprocessor_model_id,
            lang_type=lang_type)
        preprocessor = build_preprocessor(cfg_preprocessor, Fields.audio)
        self.assertTrue(preprocessor is not None)

        am = Model.from_pretrained(am_model_id)
        self.assertTrue(am is not None)

        voc = Model.from_pretrained(voc_model_id)
        self.assertTrue(voc is not None)

        sambert_tts = pipeline(
            task=Tasks.text_to_speech,
            pipeline_name=Pipelines.sambert_hifigan_16k_tts,
            config_file='',
            model=[am, voc],
            preprocessor=preprocessor)
            model=[preprocessor_model_id, am_model_id, voc_model_id])
        self.assertTrue(sambert_tts is not None)

        output = sambert_tts(text)
        self.assertTrue(len(output['output']) > 0)
        write('output.wav', 16000, output['output'])