diff --git a/modelscope/pipelines/audio/text_to_speech_pipeline.py b/modelscope/pipelines/audio/text_to_speech_pipeline.py index 22586d3e..8ac92118 100644 --- a/modelscope/pipelines/audio/text_to_speech_pipeline.py +++ b/modelscope/pipelines/audio/text_to_speech_pipeline.py @@ -9,7 +9,8 @@ from modelscope.models.audio.tts.am import SambertNetHifi16k from modelscope.models.audio.tts.vocoder import Hifigan16k from modelscope.pipelines.base import Pipeline from modelscope.pipelines.builder import PIPELINES -from modelscope.preprocessors import TextToTacotronSymbols, build_preprocessor +from modelscope.preprocessors import (Preprocessor, TextToTacotronSymbols, + build_preprocessor) from modelscope.utils.constant import Fields, Tasks __all__ = ['TextToSpeechSambertHifigan16kPipeline'] @@ -20,19 +21,19 @@ __all__ = ['TextToSpeechSambertHifigan16kPipeline'] class TextToSpeechSambertHifigan16kPipeline(Pipeline): def __init__(self, - config_file: str = None, - model: List[Model] = None, - preprocessor: TextToTacotronSymbols = None, + model: List[str] = None, + preprocessor: Preprocessor = None, **kwargs): - super().__init__( - config_file=config_file, - model=model, - preprocessor=preprocessor, - **kwargs) - assert len(model) == 2, 'model number should be 2' - self._am = model[0] - self._vocoder = model[1] - self._preprocessor = preprocessor + assert len(model) == 3, 'model number should be 3' + if preprocessor is None: + lang_type = 'pinyin' + if 'lang_type' in kwargs: + lang_type = kwargs.lang_type + preprocessor = TextToTacotronSymbols(model[0], lang_type=lang_type) + models = [model[1], model[2]] + super().__init__(model=models, preprocessor=preprocessor, **kwargs) + self._am = self.models[0] + self._vocoder = self.models[1] def forward(self, inputs: Dict[str, Any]) -> Dict[str, np.ndarray]: texts = inputs['texts'] diff --git a/tests/pipelines/test_text_to_speech.py b/tests/pipelines/test_text_to_speech.py index c371d80a..c445f46f 100644 --- a/tests/pipelines/test_text_to_speech.py +++ b/tests/pipelines/test_text_to_speech.py @@ -1,6 +1,5 @@ import unittest -import tensorflow as tf # NOTICE: Tensorflow 1.15 seems not so compatible with pytorch. # A segmentation fault may be raise by pytorch cpp library # if 'import tensorflow' in front of 'import torch'. @@ -16,6 +15,8 @@ from modelscope.utils.constant import Fields, Tasks from modelscope.utils.logger import get_logger from modelscope.utils.test_utils import test_level +import tensorflow as tf # isort:skip + logger = get_logger() @@ -23,33 +24,14 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_pipeline(self): - lang_type = 'pinyin' text = '明天天气怎么样' preprocessor_model_id = 'damo/speech_binary_tts_frontend_resource' am_model_id = 'damo/speech_sambert16k_tts_zhitian_emo' voc_model_id = 'damo/speech_hifigan16k_tts_zhitian_emo' - - cfg_preprocessor = dict( - type=Preprocessors.text_to_tacotron_symbols, - model_name=preprocessor_model_id, - lang_type=lang_type) - preprocessor = build_preprocessor(cfg_preprocessor, Fields.audio) - self.assertTrue(preprocessor is not None) - - am = Model.from_pretrained(am_model_id) - self.assertTrue(am is not None) - - voc = Model.from_pretrained(voc_model_id) - self.assertTrue(voc is not None) - sambert_tts = pipeline( task=Tasks.text_to_speech, - pipeline_name=Pipelines.sambert_hifigan_16k_tts, - config_file='', - model=[am, voc], - preprocessor=preprocessor) + model=[preprocessor_model_id, am_model_id, voc_model_id]) self.assertTrue(sambert_tts is not None) - output = sambert_tts(text) self.assertTrue(len(output['output']) > 0) write('output.wav', 16000, output['output'])