From a63998c04826b93567f528b8cfa72940c51e3c93 Mon Sep 17 00:00:00 2001 From: WeberJulian Date: Sat, 1 Jan 2022 21:08:13 +0100 Subject: [PATCH 1/2] Fix phoneme language --- TTS/tts/datasets/dataset.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index 843cea58..4f8a6e17 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -273,7 +273,9 @@ class TTSDataset(Dataset): item = args[0] func_args = args[1] text, wav_file, *_ = item - func_args[3] = item[3] + if item[3] is not None and item[3] != '': + # If language is specified in dataset, overwrite phoneme_language key + func_args[3] = item[3] phonemes = TTSDataset._load_or_generate_phoneme_sequence(wav_file, text, *func_args) return phonemes From 9d0a732f97c6394db94e177949541c6782390eae Mon Sep 17 00:00:00 2001 From: WeberJulian Date: Sat, 1 Jan 2022 21:22:43 +0100 Subject: [PATCH 2/2] Fix VITS multispeaker recipe --- recipes/vctk/vits/train_vits.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/recipes/vctk/vits/train_vits.py b/recipes/vctk/vits/train_vits.py index 19074ce3..a7ec2eae 100644 --- a/recipes/vctk/vits/train_vits.py +++ b/recipes/vctk/vits/train_vits.py @@ -5,7 +5,7 @@ from TTS.trainer import Trainer, TrainingArgs from TTS.tts.configs.shared_configs import BaseDatasetConfig from TTS.tts.configs.vits_config import VitsConfig from TTS.tts.datasets import load_tts_samples -from TTS.tts.models.vits import Vits +from TTS.tts.models.vits import Vits, VitsArgs from TTS.tts.utils.speakers import SpeakerManager from TTS.utils.audio import AudioProcessor @@ -31,10 +31,14 @@ audio_config = BaseAudioConfig( resample=True, ) +vitsArgs = VitsArgs( + use_speaker_embedding=True, +) + config = VitsConfig( + model_args=vitsArgs, audio=audio_config, run_name="vits_vctk", - use_speaker_embedding=True, batch_size=32, eval_batch_size=16, batch_group_size=5,