mirror of https://github.com/coqui-ai/TTS.git
Prevent resampling
This commit is contained in:
parent
2d65f005a2
commit
9b5c295fcf
|
@ -377,8 +377,8 @@ class Xtts(BaseTTS):
|
||||||
sr (int): Sample rate of the audio.
|
sr (int): Sample rate of the audio.
|
||||||
length (int): Length of the audio in seconds. Defaults to 3.
|
length (int): Length of the audio in seconds. Defaults to 3.
|
||||||
"""
|
"""
|
||||||
|
if sr != 22050:
|
||||||
audio_22k = torchaudio.functional.resample(audio, sr, 22050)
|
audio_22k = torchaudio.functional.resample(audio, sr, 22050)
|
||||||
audio_22k = audio_22k[:, : 22050 * length]
|
audio_22k = audio_22k[:, : 22050 * length]
|
||||||
if self.args.gpt_use_perceiver_resampler:
|
if self.args.gpt_use_perceiver_resampler:
|
||||||
mel = wav_to_mel_cloning(audio_22k,
|
mel = wav_to_mel_cloning(audio_22k,
|
||||||
|
@ -600,6 +600,7 @@ class Xtts(BaseTTS):
|
||||||
(gpt_cond_latent, diffusion_conditioning, speaker_embedding) = self.get_conditioning_latents(
|
(gpt_cond_latent, diffusion_conditioning, speaker_embedding) = self.get_conditioning_latents(
|
||||||
audio_path=ref_audio_path, gpt_cond_len=gpt_cond_len, max_ref_length=max_ref_len, sound_norm_refs=sound_norm_refs
|
audio_path=ref_audio_path, gpt_cond_len=gpt_cond_len, max_ref_length=max_ref_len, sound_norm_refs=sound_norm_refs
|
||||||
)
|
)
|
||||||
|
|
||||||
return self.inference(
|
return self.inference(
|
||||||
text,
|
text,
|
||||||
language,
|
language,
|
||||||
|
|
Loading…
Reference in New Issue