mirror of https://github.com/coqui-ai/TTS.git
parent
d63a6bb690
commit
8fffd4e813
|
@ -244,6 +244,7 @@ class TTSDataset(Dataset):
|
||||||
# TODO: find a better fix
|
# TODO: find a better fix
|
||||||
return self.load_data(self.rescue_item_idx)
|
return self.load_data(self.rescue_item_idx)
|
||||||
|
|
||||||
|
pitch = None
|
||||||
if self.compute_f0:
|
if self.compute_f0:
|
||||||
pitch = self._load_or_compute_pitch(self.ap, wav_file, self.f0_cache_path)
|
pitch = self._load_or_compute_pitch(self.ap, wav_file, self.f0_cache_path)
|
||||||
|
|
||||||
|
|
|
@ -687,7 +687,7 @@ class FastPitchLoss(nn.Module):
|
||||||
|
|
||||||
spec_loss = self.spec_loss(decoder_output, decoder_target, decoder_output_lens)
|
spec_loss = self.spec_loss(decoder_output, decoder_target, decoder_output_lens)
|
||||||
ssim_loss = self.ssim(decoder_output, decoder_target, decoder_output_lens)
|
ssim_loss = self.ssim(decoder_output, decoder_target, decoder_output_lens)
|
||||||
dur_loss = self.dur_loss(dur_output[:, : ,None], dur_target[:, :, None], input_lens)
|
dur_loss = self.dur_loss(dur_output[:, :, None], dur_target[:, :, None], input_lens)
|
||||||
pitch_loss = self.pitch_loss(pitch_output.transpose(1, 2), pitch_target.transpose(1, 2), input_lens)
|
pitch_loss = self.pitch_loss(pitch_output.transpose(1, 2), pitch_target.transpose(1, 2), input_lens)
|
||||||
loss = (
|
loss = (
|
||||||
self.spec_loss_alpha * spec_loss
|
self.spec_loss_alpha * spec_loss
|
||||||
|
|
|
@ -45,12 +45,10 @@ def text2phone(text, language, use_espeak_phonemes=False):
|
||||||
# TO REVIEW : How to have a good implementation for this?
|
# TO REVIEW : How to have a good implementation for this?
|
||||||
if language == "zh-CN":
|
if language == "zh-CN":
|
||||||
ph = chinese_text_to_phonemes(text)
|
ph = chinese_text_to_phonemes(text)
|
||||||
print(" > Phonemes: {}".format(ph))
|
|
||||||
return ph
|
return ph
|
||||||
|
|
||||||
if language == "ja-jp":
|
if language == "ja-jp":
|
||||||
ph = japanese_text_to_phonemes(text)
|
ph = japanese_text_to_phonemes(text)
|
||||||
print(" > Phonemes: {}".format(ph))
|
|
||||||
return ph
|
return ph
|
||||||
|
|
||||||
if gruut.is_language_supported(language):
|
if gruut.is_language_supported(language):
|
||||||
|
@ -80,7 +78,6 @@ def text2phone(text, language, use_espeak_phonemes=False):
|
||||||
|
|
||||||
# Fix a few phonemes
|
# Fix a few phonemes
|
||||||
ph = ph.translate(GRUUT_TRANS_TABLE)
|
ph = ph.translate(GRUUT_TRANS_TABLE)
|
||||||
|
|
||||||
return ph
|
return ph
|
||||||
|
|
||||||
raise ValueError(f" [!] Language {language} is not supported for phonemization.")
|
raise ValueError(f" [!] Language {language} is not supported for phonemization.")
|
||||||
|
|
Loading…
Reference in New Issue