diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 208ec4d5..6b8cc591 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -272,6 +272,11 @@ class Xtts(BaseTTS): style_embs = [] for i in range(0, audio.shape[1], 22050 * chunk_length): audio_chunk = audio[:, i : i + 22050 * chunk_length] + + # if the chunk is too short ignore it + if audio_chunk.size(-1) < 22050 * 0.33: + continue + mel_chunk = wav_to_mel_cloning( audio_chunk, mel_norms=self.mel_stats.cpu(),