jiaqi.sjq 3 years ago
parent
commit
9b7e68b67e
2 changed files with 17 additions and 34 deletions
  1. +14
    -13
      modelscope/pipelines/audio/text_to_speech_pipeline.py
  2. +3
    -21
      tests/pipelines/test_text_to_speech.py

+ 14
- 13
modelscope/pipelines/audio/text_to_speech_pipeline.py View File

@@ -9,7 +9,8 @@ from modelscope.models.audio.tts.am import SambertNetHifi16k
from modelscope.models.audio.tts.vocoder import Hifigan16k from modelscope.models.audio.tts.vocoder import Hifigan16k
from modelscope.pipelines.base import Pipeline from modelscope.pipelines.base import Pipeline
from modelscope.pipelines.builder import PIPELINES from modelscope.pipelines.builder import PIPELINES
from modelscope.preprocessors import TextToTacotronSymbols, build_preprocessor
from modelscope.preprocessors import (Preprocessor, TextToTacotronSymbols,
build_preprocessor)
from modelscope.utils.constant import Fields, Tasks from modelscope.utils.constant import Fields, Tasks


__all__ = ['TextToSpeechSambertHifigan16kPipeline'] __all__ = ['TextToSpeechSambertHifigan16kPipeline']
@@ -20,19 +21,19 @@ __all__ = ['TextToSpeechSambertHifigan16kPipeline']
class TextToSpeechSambertHifigan16kPipeline(Pipeline): class TextToSpeechSambertHifigan16kPipeline(Pipeline):


def __init__(self, def __init__(self,
config_file: str = None,
model: List[Model] = None,
preprocessor: TextToTacotronSymbols = None,
model: List[str] = None,
preprocessor: Preprocessor = None,
**kwargs): **kwargs):
super().__init__(
config_file=config_file,
model=model,
preprocessor=preprocessor,
**kwargs)
assert len(model) == 2, 'model number should be 2'
self._am = model[0]
self._vocoder = model[1]
self._preprocessor = preprocessor
assert len(model) == 3, 'model number should be 3'
if preprocessor is None:
lang_type = 'pinyin'
if 'lang_type' in kwargs:
lang_type = kwargs.lang_type
preprocessor = TextToTacotronSymbols(model[0], lang_type=lang_type)
models = [model[1], model[2]]
super().__init__(model=models, preprocessor=preprocessor, **kwargs)
self._am = self.models[0]
self._vocoder = self.models[1]


def forward(self, inputs: Dict[str, Any]) -> Dict[str, np.ndarray]: def forward(self, inputs: Dict[str, Any]) -> Dict[str, np.ndarray]:
texts = inputs['texts'] texts = inputs['texts']


+ 3
- 21
tests/pipelines/test_text_to_speech.py View File

@@ -1,6 +1,5 @@
import unittest import unittest


import tensorflow as tf
# NOTICE: Tensorflow 1.15 seems not so compatible with pytorch. # NOTICE: Tensorflow 1.15 seems not so compatible with pytorch.
# A segmentation fault may be raise by pytorch cpp library # A segmentation fault may be raise by pytorch cpp library
# if 'import tensorflow' in front of 'import torch'. # if 'import tensorflow' in front of 'import torch'.
@@ -16,6 +15,8 @@ from modelscope.utils.constant import Fields, Tasks
from modelscope.utils.logger import get_logger from modelscope.utils.logger import get_logger
from modelscope.utils.test_utils import test_level from modelscope.utils.test_utils import test_level


import tensorflow as tf # isort:skip

logger = get_logger() logger = get_logger()




@@ -23,33 +24,14 @@ class TextToSpeechSambertHifigan16kPipelineTest(unittest.TestCase):


@unittest.skipUnless(test_level() >= 1, 'skip test in current test level') @unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_pipeline(self): def test_pipeline(self):
lang_type = 'pinyin'
text = '明天天气怎么样' text = '明天天气怎么样'
preprocessor_model_id = 'damo/speech_binary_tts_frontend_resource' preprocessor_model_id = 'damo/speech_binary_tts_frontend_resource'
am_model_id = 'damo/speech_sambert16k_tts_zhitian_emo' am_model_id = 'damo/speech_sambert16k_tts_zhitian_emo'
voc_model_id = 'damo/speech_hifigan16k_tts_zhitian_emo' voc_model_id = 'damo/speech_hifigan16k_tts_zhitian_emo'

cfg_preprocessor = dict(
type=Preprocessors.text_to_tacotron_symbols,
model_name=preprocessor_model_id,
lang_type=lang_type)
preprocessor = build_preprocessor(cfg_preprocessor, Fields.audio)
self.assertTrue(preprocessor is not None)

am = Model.from_pretrained(am_model_id)
self.assertTrue(am is not None)

voc = Model.from_pretrained(voc_model_id)
self.assertTrue(voc is not None)

sambert_tts = pipeline( sambert_tts = pipeline(
task=Tasks.text_to_speech, task=Tasks.text_to_speech,
pipeline_name=Pipelines.sambert_hifigan_16k_tts,
config_file='',
model=[am, voc],
preprocessor=preprocessor)
model=[preprocessor_model_id, am_model_id, voc_model_id])
self.assertTrue(sambert_tts is not None) self.assertTrue(sambert_tts is not None)

output = sambert_tts(text) output = sambert_tts(text)
self.assertTrue(len(output['output']) > 0) self.assertTrue(len(output['output']) > 0)
write('output.wav', 16000, output['output']) write('output.wav', 16000, output['output'])


Loading…
Cancel
Save