mirror of https://github.com/coqui-ai/TTS.git
Use load_audio in get_speaker_embedding
This commit is contained in:
parent
abfc738981
commit
a0f657c764
|
@ -419,14 +419,9 @@ class Xtts(BaseTTS):
|
||||||
self,
|
self,
|
||||||
audio_path
|
audio_path
|
||||||
):
|
):
|
||||||
wav, sr = torchaudio.load(audio_path)
|
audio = load_audio(audio_path, self.hifigan_decoder.speaker_encoder_audio_config["sample_rate"])
|
||||||
spk_waveform = torchaudio.functional.resample(
|
|
||||||
wav,
|
|
||||||
22050,
|
|
||||||
self.hifigan_decoder.speaker_encoder_audio_config["sample_rate"],
|
|
||||||
).to(self.device)
|
|
||||||
speaker_embedding = self.hifigan_decoder.speaker_encoder.forward(
|
speaker_embedding = self.hifigan_decoder.speaker_encoder.forward(
|
||||||
spk_waveform, l2_norm=True
|
audio.to(self.device), l2_norm=True
|
||||||
).unsqueeze(-1).to(self.device)
|
).unsqueeze(-1).to(self.device)
|
||||||
return speaker_embedding
|
return speaker_embedding
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue