fix: only compute spk embeddings for models that support it

Fixes #1440. Passing a `speaker_wav` argument to regular Vits models failed because they don't support voice cloning. Now that argument is simply ignored.
2023-11-20 16:07:30 +01:00 · 2023-11-20 16:07:30 +01:00 · 2d45cc73ae
parent cde319874e
commit 2d45cc73ae
1 changed files with 5 additions and 1 deletions
--- a/TTS/utils/synthesizer.py
+++ b/TTS/utils/synthesizer.py
@ -358,7 +358,11 @@ class Synthesizer(nn.Module):
                )

        # compute a new d_vector from the given clip.
-        if speaker_wav is not None and self.tts_model.speaker_manager is not None:
+        if (
+            speaker_wav is not None
+            and self.tts_model.speaker_manager is not None
+            and self.tts_model.speaker_manager.encoder_ap is not None
+        ):
            speaker_embedding = self.tts_model.speaker_manager.compute_embedding_from_clip(speaker_wav)

        vocoder_device = "cpu"