<add> Chinese mandarin implementation (tacotron2)

This commit is contained in:
kirianguiller 2021-02-15 16:04:47 +01:00 committed by Eren Gölge
parent 49665783a6
commit 42ba30eb8f
3 changed files with 14 additions and 11 deletions

View File

@ -372,15 +372,10 @@ def _voxcel_x(root_path, meta_file, voxcel_idx):
def baker(root_path: str, meta_file: str) -> List[List[str]]:
"""Normalizes the Baker meta data file to TTS format
Args:
root_path (str): path to the baker dataset
meta_file (str): name of the meta dataset containing names of wav to select and the transcript of the sentence
Returns:
List[List[str]]: List of (text, wav_path, speaker_name) associated with each sentences
"""
# ======================================== Baker (chinese mandarin single speaker) ===========================================
def baker(root_path, meta_file):
"""Normalizes the Baker meta data file to TTS format"""
txt_file = os.path.join(root_path, meta_file)
items = []
speaker_name = "baker"
@ -389,4 +384,4 @@ def baker(root_path: str, meta_file: str) -> List[List[str]]:
wav_name, text = line.rstrip('\n').split("|")
wav_path = os.path.join(root_path, "clips_22", wav_name)
items.append([text, wav_path, speaker_name])
return items
return items

View File

@ -1,5 +1,5 @@
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import pkg_resources
installed = {pkg.key for pkg in pkg_resources.working_set} #pylint: disable=not-an-iterable
if 'tensorflow' in installed or 'tensorflow-gpu' in installed:
@ -220,6 +220,7 @@ def synthesis(model,
model outputs.
speaker_id (int): id of speaker
style_wav (str | Dict[str, float]): Uses for style embedding of GST.
style_wav (str): Uses for style embedding of GST.
truncated (bool): keep model states after inference. It can be used
for continuous inference at long texts.
enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence.

View File

@ -122,6 +122,13 @@ class Synthesizer(object):
speaker_embedding = self.init_speaker(speaker_idx)
use_gl = self.vocoder_model is None
# check if compute gst style
gst_style_input = None
if self.tts_config.use_gst:
if self.tts_config.gst["gst_style_input"] not in ["", {}]:
style_wav = self.tts_config.gst["gst_style_input"]
for sen in sens:
# synthesize voice
waveform, _, _, mel_postnet_spec, _, _ = synthesis(
@ -131,7 +138,7 @@ class Synthesizer(object):
self.use_cuda,
self.ap,
speaker_idx,
None,
gst_style_input,
False,
self.tts_config.enable_eos_bos_chars,
use_gl,