mirror of https://github.com/coqui-ai/TTS.git
Fix phonemes
This commit is contained in:
parent
846bf16f02
commit
120332d53f
|
@ -7,7 +7,7 @@ from tqdm.contrib.concurrent import process_map
|
|||
|
||||
from TTS.config import load_config
|
||||
from TTS.tts.datasets import load_tts_samples
|
||||
from TTS.tts.utils.text import text2phone
|
||||
from TTS.tts.utils.text import text2phone, phoneme_to_sequence
|
||||
|
||||
|
||||
def compute_phonemes(item):
|
||||
|
|
|
@ -273,7 +273,7 @@ class TTSDataset(Dataset):
|
|||
item = args[0]
|
||||
func_args = args[1]
|
||||
text, wav_file, *_ = item
|
||||
func_args[3] = item[4]
|
||||
func_args[3] = item[3]
|
||||
phonemes = TTSDataset._load_or_generate_phoneme_sequence(wav_file, text, *func_args)
|
||||
return phonemes
|
||||
|
||||
|
|
|
@ -543,6 +543,7 @@ class Vits(BaseTTS):
|
|||
"style_wav": style_wav,
|
||||
"d_vector": d_vector,
|
||||
"language_id": language_id,
|
||||
"language_name": language_name,
|
||||
}
|
||||
|
||||
def forward(
|
||||
|
@ -1061,6 +1062,7 @@ class Vits(BaseTTS):
|
|||
d_vector=aux_inputs["d_vector"],
|
||||
style_wav=aux_inputs["style_wav"],
|
||||
language_id=aux_inputs["language_id"],
|
||||
language_name=aux_inputs["language_name"],
|
||||
enable_eos_bos_chars=self.config.enable_eos_bos_chars,
|
||||
use_griffin_lim=True,
|
||||
do_trim_silence=False,
|
||||
|
|
|
@ -15,7 +15,7 @@ if "tensorflow" in installed or "tensorflow-gpu" in installed:
|
|||
import tensorflow as tf
|
||||
|
||||
|
||||
def text_to_seq(text, CONFIG, custom_symbols=None):
|
||||
def text_to_seq(text, CONFIG, custom_symbols=None, language=None):
|
||||
text_cleaner = [CONFIG.text_cleaner]
|
||||
# text ot phonemes to sequence vector
|
||||
if CONFIG.use_phonemes:
|
||||
|
@ -23,7 +23,7 @@ def text_to_seq(text, CONFIG, custom_symbols=None):
|
|||
phoneme_to_sequence(
|
||||
text,
|
||||
text_cleaner,
|
||||
CONFIG.phoneme_language,
|
||||
language if language else CONFIG.phoneme_language,
|
||||
CONFIG.enable_eos_bos_chars,
|
||||
tp=CONFIG.characters,
|
||||
add_blank=CONFIG.add_blank,
|
||||
|
@ -212,6 +212,7 @@ def synthesis(
|
|||
do_trim_silence=False,
|
||||
d_vector=None,
|
||||
language_id=None,
|
||||
language_name=None,
|
||||
backend="torch",
|
||||
):
|
||||
"""Synthesize voice for the given text using Griffin-Lim vocoder or just compute output features to be passed to
|
||||
|
@ -262,7 +263,7 @@ def synthesis(
|
|||
if hasattr(model, "make_symbols"):
|
||||
custom_symbols = model.make_symbols(CONFIG)
|
||||
# preprocess the given text
|
||||
text_inputs = text_to_seq(text, CONFIG, custom_symbols=custom_symbols)
|
||||
text_inputs = text_to_seq(text, CONFIG, custom_symbols=custom_symbols, language=language_name)
|
||||
# pass tensors to backend
|
||||
if backend == "torch":
|
||||
if speaker_id is not None:
|
||||
|
|
Loading…
Reference in New Issue