From 260ffd77561e78959fc99d449da5f94cb21f6c61 Mon Sep 17 00:00:00 2001 From: Edresson Casanova Date: Mon, 28 Mar 2022 20:01:51 +0000 Subject: [PATCH] Fix the bug in inference --- TTS/tts/models/vits.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index d212ea12..e09c3667 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -1516,6 +1516,11 @@ class Vits(BaseTTS): # TODO: consider baking the speaker encoder into the model and call it from there. # as it is probably easier for model distribution. state["model"] = {k: v for k, v in state["model"].items() if "speaker_encoder" not in k} + + if self.args.TTS_part_sample_rate is not None and eval: + # audio resampler is not used in inference time + self.audio_resampler = None + # handle fine-tuning from a checkpoint with additional speakers if hasattr(self, "emb_g") and state["model"]["emb_g.weight"].shape != self.emb_g.weight.shape: num_new_speakers = self.emb_g.weight.shape[0] - state["model"]["emb_g.weight"].shape[0]