From 662ee2ba6341fba737453005c7773ff3b0fd0154 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 6 Nov 2023 09:15:16 -0300 Subject: [PATCH] Bug fix on XTTS get_gpt_cond_latents --- TTS/tts/models/xtts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index ecb31a9a..cb0aff75 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -378,8 +378,8 @@ class Xtts(BaseTTS): length (int): Length of the audio in seconds. Defaults to 3. """ if sr != 22050: - audio_22k = torchaudio.functional.resample(audio, sr, 22050) - audio_22k = audio_22k[:, : 22050 * length] + audio = torchaudio.functional.resample(audio, sr, 22050) + audio = audio[:, : 22050 * length] if self.args.gpt_use_perceiver_resampler: n_fft = 2048 hop_length = 256 @@ -389,7 +389,7 @@ class Xtts(BaseTTS): hop_length = 1024 win_length = 4096 mel = wav_to_mel_cloning( - audio_22k, + audio, mel_norms=self.mel_stats.cpu(), n_fft=n_fft, hop_length=hop_length,