Use load_audio in get_speaker_embedding

This commit is contained in:
WeberJulian 2023-09-28 10:52:50 -03:00
parent abfc738981
commit a0f657c764
1 changed files with 2 additions and 7 deletions

View File

@ -419,14 +419,9 @@ class Xtts(BaseTTS):
self, self,
audio_path audio_path
): ):
wav, sr = torchaudio.load(audio_path) audio = load_audio(audio_path, self.hifigan_decoder.speaker_encoder_audio_config["sample_rate"])
spk_waveform = torchaudio.functional.resample(
wav,
22050,
self.hifigan_decoder.speaker_encoder_audio_config["sample_rate"],
).to(self.device)
speaker_embedding = self.hifigan_decoder.speaker_encoder.forward( speaker_embedding = self.hifigan_decoder.speaker_encoder.forward(
spk_waveform, l2_norm=True audio.to(self.device), l2_norm=True
).unsqueeze(-1).to(self.device) ).unsqueeze(-1).to(self.device)
return speaker_embedding return speaker_embedding