From 9b5c295fcf8e1f2e28ccd0a49d6930595d2872d6 Mon Sep 17 00:00:00 2001 From: Eren G??lge Date: Sun, 5 Nov 2023 00:43:28 +0100 Subject: [PATCH] Prevent resampling --- TTS/tts/models/xtts.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 1fa349cf..f3bebf2c 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -377,8 +377,8 @@ class Xtts(BaseTTS): sr (int): Sample rate of the audio. length (int): Length of the audio in seconds. Defaults to 3. """ - - audio_22k = torchaudio.functional.resample(audio, sr, 22050) + if sr != 22050: + audio_22k = torchaudio.functional.resample(audio, sr, 22050) audio_22k = audio_22k[:, : 22050 * length] if self.args.gpt_use_perceiver_resampler: mel = wav_to_mel_cloning(audio_22k, @@ -600,6 +600,7 @@ class Xtts(BaseTTS): (gpt_cond_latent, diffusion_conditioning, speaker_embedding) = self.get_conditioning_latents( audio_path=ref_audio_path, gpt_cond_len=gpt_cond_len, max_ref_length=max_ref_len, sound_norm_refs=sound_norm_refs ) + return self.inference( text, language,