diff --git a/README.md b/README.md index 18d1469c..577eb3e9 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ Underlined "TTS*" and "Judy*" are 🐸TTS models - Speedy-Speech: [paper](https://arxiv.org/abs/2008.03802) - Align-TTS: [paper](https://arxiv.org/abs/2003.01950) - FastPitch: [paper](https://arxiv.org/pdf/2006.06873.pdf) +- FastSpeech: [paper](https://arxiv.org/abs/1905.09263) ### End-to-End Models - VITS: [paper](https://arxiv.org/pdf/2106.06103) diff --git a/TTS/tts/configs/speedy_speech_config.py b/TTS/tts/configs/speedy_speech_config.py index 23a96ff1..7c9bb8e4 100644 --- a/TTS/tts/configs/speedy_speech_config.py +++ b/TTS/tts/configs/speedy_speech_config.py @@ -144,8 +144,8 @@ class SpeedySpeechConfig(BaseTTSConfig): dur_loss_alpha: float = 1.0 spec_loss_alpha: float = 1.0 aligner_loss_alpha: float = 1.0 - binary_align_loss_alpha: float = 1.0 - binary_align_loss_start_step: int = 20000 + binary_align_loss_alpha: float = 0.3 + binary_align_loss_start_step: int = 50000 # overrides min_seq_len: int = 13