From 2d45cc73aefc202582eeb47755e64965c5460912 Mon Sep 17 00:00:00 2001
From: Enno Hermann <enno.hermann@idiap.ch>
Date: Mon, 20 Nov 2023 16:07:30 +0100
Subject: [PATCH] fix: only compute spk embeddings for models that support it

Fixes #1440. Passing a `speaker_wav` argument to regular Vits models failed
because they don't support voice cloning. Now that argument is simply ignored.
---
 TTS/utils/synthesizer.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py
index 8efe608b..0d0eb78a 100644
--- a/TTS/utils/synthesizer.py
+++ b/TTS/utils/synthesizer.py
@@ -358,7 +358,11 @@ class Synthesizer(nn.Module):
                 )
 
         # compute a new d_vector from the given clip.
-        if speaker_wav is not None and self.tts_model.speaker_manager is not None:
+        if (
+            speaker_wav is not None
+            and self.tts_model.speaker_manager is not None
+            and self.tts_model.speaker_manager.encoder_ap is not None
+        ):
             speaker_embedding = self.tts_model.speaker_manager.compute_embedding_from_clip(speaker_wav)
 
         vocoder_device = "cpu"