From 2d45cc73aefc202582eeb47755e64965c5460912 Mon Sep 17 00:00:00 2001 From: Enno Hermann <enno.hermann@idiap.ch> Date: Mon, 20 Nov 2023 16:07:30 +0100 Subject: [PATCH] fix: only compute spk embeddings for models that support it Fixes #1440. Passing a `speaker_wav` argument to regular Vits models failed because they don't support voice cloning. Now that argument is simply ignored. --- TTS/utils/synthesizer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index 8efe608b..0d0eb78a 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -358,7 +358,11 @@ class Synthesizer(nn.Module): ) # compute a new d_vector from the given clip. - if speaker_wav is not None and self.tts_model.speaker_manager is not None: + if ( + speaker_wav is not None + and self.tts_model.speaker_manager is not None + and self.tts_model.speaker_manager.encoder_ap is not None + ): speaker_embedding = self.tts_model.speaker_manager.compute_embedding_from_clip(speaker_wav) vocoder_device = "cpu"