mirror of https://github.com/coqui-ai/TTS.git
<add> Chinese mandarin implementation (tacotron2)
This commit is contained in:
parent
49665783a6
commit
42ba30eb8f
|
@ -372,15 +372,10 @@ def _voxcel_x(root_path, meta_file, voxcel_idx):
|
|||
|
||||
|
||||
|
||||
def baker(root_path: str, meta_file: str) -> List[List[str]]:
|
||||
"""Normalizes the Baker meta data file to TTS format
|
||||
|
||||
Args:
|
||||
root_path (str): path to the baker dataset
|
||||
meta_file (str): name of the meta dataset containing names of wav to select and the transcript of the sentence
|
||||
Returns:
|
||||
List[List[str]]: List of (text, wav_path, speaker_name) associated with each sentences
|
||||
"""
|
||||
# ======================================== Baker (chinese mandarin single speaker) ===========================================
|
||||
def baker(root_path, meta_file):
|
||||
"""Normalizes the Baker meta data file to TTS format"""
|
||||
txt_file = os.path.join(root_path, meta_file)
|
||||
items = []
|
||||
speaker_name = "baker"
|
||||
|
@ -389,4 +384,4 @@ def baker(root_path: str, meta_file: str) -> List[List[str]]:
|
|||
wav_name, text = line.rstrip('\n').split("|")
|
||||
wav_path = os.path.join(root_path, "clips_22", wav_name)
|
||||
items.append([text, wav_path, speaker_name])
|
||||
return items
|
||||
return items
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import os
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
||||
import pkg_resources
|
||||
installed = {pkg.key for pkg in pkg_resources.working_set} #pylint: disable=not-an-iterable
|
||||
if 'tensorflow' in installed or 'tensorflow-gpu' in installed:
|
||||
|
@ -220,6 +220,7 @@ def synthesis(model,
|
|||
model outputs.
|
||||
speaker_id (int): id of speaker
|
||||
style_wav (str | Dict[str, float]): Uses for style embedding of GST.
|
||||
style_wav (str): Uses for style embedding of GST.
|
||||
truncated (bool): keep model states after inference. It can be used
|
||||
for continuous inference at long texts.
|
||||
enable_eos_bos_chars (bool): enable special chars for end of sentence and start of sentence.
|
||||
|
|
|
@ -122,6 +122,13 @@ class Synthesizer(object):
|
|||
speaker_embedding = self.init_speaker(speaker_idx)
|
||||
use_gl = self.vocoder_model is None
|
||||
|
||||
|
||||
# check if compute gst style
|
||||
gst_style_input = None
|
||||
if self.tts_config.use_gst:
|
||||
if self.tts_config.gst["gst_style_input"] not in ["", {}]:
|
||||
style_wav = self.tts_config.gst["gst_style_input"]
|
||||
|
||||
for sen in sens:
|
||||
# synthesize voice
|
||||
waveform, _, _, mel_postnet_spec, _, _ = synthesis(
|
||||
|
@ -131,7 +138,7 @@ class Synthesizer(object):
|
|||
self.use_cuda,
|
||||
self.ap,
|
||||
speaker_idx,
|
||||
None,
|
||||
gst_style_input,
|
||||
False,
|
||||
self.tts_config.enable_eos_bos_chars,
|
||||
use_gl,
|
||||
|
|
Loading…
Reference in New Issue