mirror of https://github.com/coqui-ai/TTS.git
Update vits dataset
This commit is contained in:
parent
a84499c5da
commit
942df0fb05
|
@ -196,14 +196,12 @@ class VitsDataset(TTSDataset):
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
item = self.samples[idx]
|
item = self.samples[idx]
|
||||||
|
raw_text = item["text"]
|
||||||
|
|
||||||
text, wav_file, speaker_name, language_name, _ = _parse_sample(item)
|
wav, _ = load_audio(item["audio_file"])
|
||||||
raw_text = text
|
wav_filename = os.path.basename(item["audio_file"])
|
||||||
|
|
||||||
wav, _ = load_audio(wav_file)
|
token_ids = self.get_token_ids(idx, item["text"])
|
||||||
wav_filename = os.path.basename(wav_file)
|
|
||||||
|
|
||||||
token_ids = self.get_token_ids(idx, text)
|
|
||||||
|
|
||||||
# after phonemization the text length may change
|
# after phonemization the text length may change
|
||||||
# this is a shameful 🤭 hack to prevent longer phonemes
|
# this is a shameful 🤭 hack to prevent longer phonemes
|
||||||
|
@ -218,8 +216,8 @@ class VitsDataset(TTSDataset):
|
||||||
"token_len": len(token_ids),
|
"token_len": len(token_ids),
|
||||||
"wav": wav,
|
"wav": wav,
|
||||||
"wav_file": wav_filename,
|
"wav_file": wav_filename,
|
||||||
"speaker_name": speaker_name,
|
"speaker_name": item["speaker_name"],
|
||||||
"language_name": language_name,
|
"language_name": item["language"],
|
||||||
}
|
}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -126,26 +126,6 @@ class Synthesizer(object):
|
||||||
self.encoder_checkpoint = self.tts_config.model_args.speaker_encoder_model_path
|
self.encoder_checkpoint = self.tts_config.model_args.speaker_encoder_model_path
|
||||||
self.encoder_config = self.tts_config.model_args.speaker_encoder_config_path
|
self.encoder_config = self.tts_config.model_args.speaker_encoder_config_path
|
||||||
|
|
||||||
def _is_use_speaker_embedding(self):
|
|
||||||
"""Check if the speaker embedding is used in the model"""
|
|
||||||
# we handle here the case that some models use model_args some don't
|
|
||||||
use_speaker_embedding = False
|
|
||||||
if hasattr(self.tts_config, "model_args"):
|
|
||||||
use_speaker_embedding = self.tts_config["model_args"].get("use_speaker_embedding", False)
|
|
||||||
use_speaker_embedding = use_speaker_embedding or self.tts_config.get("use_speaker_embedding", False)
|
|
||||||
return use_speaker_embedding
|
|
||||||
|
|
||||||
def _is_use_d_vector_file(self):
|
|
||||||
"""Check if the d-vector file is used in the model"""
|
|
||||||
# we handle here the case that some models use model_args some don't
|
|
||||||
use_d_vector_file = False
|
|
||||||
if hasattr(self.tts_config, "model_args"):
|
|
||||||
config = self.tts_config.model_args
|
|
||||||
use_d_vector_file = config.get("use_d_vector_file", False)
|
|
||||||
config = self.tts_config
|
|
||||||
use_d_vector_file = use_d_vector_file or config.get("use_d_vector_file", False)
|
|
||||||
return use_d_vector_file
|
|
||||||
|
|
||||||
def _load_vocoder(self, model_file: str, model_config: str, use_cuda: bool) -> None:
|
def _load_vocoder(self, model_file: str, model_config: str, use_cuda: bool) -> None:
|
||||||
"""Load the vocoder model.
|
"""Load the vocoder model.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue