mirror of https://github.com/coqui-ai/TTS.git
<add> Chinese mandarin implementation (tacotron2)
This commit is contained in:
parent
49665783a6
commit
42ba30eb8f
|
@ -372,15 +372,10 @@ def _voxcel_x(root_path, meta_file, voxcel_idx):
|
|||
|
||||
|
||||
|
||||
def baker(root_path: str, meta_file: str) -> List[List[str]]:
|
||||
"""Normalizes the Baker meta data file to TTS format
|
||||
|
||||
Args:
|
||||
root_path (str): path to the baker dataset
|
||||
meta_file (str): name of the meta dataset containing names of wav to select and the transcript of the sentence
|
||||
Returns:
|
||||
List[List[str]]: List of (text, wav_path, speaker_name) associated with each sentences
|
||||
"""
|
||||
# ======================================== Baker (chinese mandarin single speaker) ===========================================
|
||||
def baker(root_path, meta_file):
|
||||
"""Normalizes the Baker meta data file to TTS format"""
|
||||
txt_file = os.path.join(root_path, meta_file)
|
||||
items = []
|
||||
speaker_name = "baker"
|
||||
|
|
|
@ -220,6 +220,7 @@ def synthesis(model,
|
|||
model outputs.
|
||||
speaker_id (int): id of speaker
|
||||
style_wav (str | Dict[str, float]): Uses for style embedding of GST.
|
||||
style_wav (str): Uses for style embedding of GST.
|
||||
truncated (bool): keep model states after inference. It can be used
|
||||
for continuous inference at long texts.
|
||||
enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence.
|
||||
|
|
|
@ -122,6 +122,13 @@ class Synthesizer(object):
|
|||
speaker_embedding = self.init_speaker(speaker_idx)
|
||||
use_gl = self.vocoder_model is None
|
||||
|
||||
|
||||
# check if compute gst style
|
||||
gst_style_input = None
|
||||
if self.tts_config.use_gst:
|
||||
if self.tts_config.gst["gst_style_input"] not in ["", {}]:
|
||||
style_wav = self.tts_config.gst["gst_style_input"]
|
||||
|
||||
for sen in sens:
|
||||
# synthesize voice
|
||||
waveform, _, _, mel_postnet_spec, _, _ = synthesis(
|
||||
|
@ -131,7 +138,7 @@ class Synthesizer(object):
|
|||
self.use_cuda,
|
||||
self.ap,
|
||||
speaker_idx,
|
||||
None,
|
||||
gst_style_input,
|
||||
False,
|
||||
self.tts_config.enable_eos_bos_chars,
|
||||
use_gl,
|
||||
|
|
Loading…
Reference in New Issue