From 0d12229b642ead3b56294899faf8001597ab6298 Mon Sep 17 00:00:00 2001
From: Dani Vera <28764301+dveni@users.noreply.github.com>
Date: Fri, 10 Mar 2023 18:35:16 +0100
Subject: [PATCH 1/4] Update vits.py

This should fix the issue https://github.com/coqui-ai/TTS/issues/1986 without breaking batch data sampling.
---
 TTS/tts/models/vits.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py
index 14c76add..78ff00c2 100644
--- a/TTS/tts/models/vits.py
+++ b/TTS/tts/models/vits.py
@@ -1628,13 +1628,23 @@ class Vits(BaseTTS):
                     pin_memory=False,
                 )
             else:
-                loader = DataLoader(
+                if num_gpus > 1:
+                    loader = DataLoader(
                     dataset,
-                    batch_sampler=sampler,
+                    sampler=sampler,
+                    batch_size=config.eval_batch_size if is_eval else config.batch_size,
                     collate_fn=dataset.collate_fn,
                     num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers,
                     pin_memory=False,
                 )
+                else:
+                    loader = DataLoader(
+                        dataset,
+                        batch_sampler=sampler,
+                        collate_fn=dataset.collate_fn,
+                        num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers,
+                        pin_memory=False,
+                    )
         return loader
 
     def get_optimizer(self) -> List:

From dfb48737fbe40c341dff52b98c628db20257f8fb Mon Sep 17 00:00:00 2001
From: Daniel Vera Nieto <dveranieto@ethz.ch>
Date: Mon, 13 Mar 2023 16:11:15 +0100
Subject: [PATCH 2/4] Style fixed

---
 TTS/tts/models/vits.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py
index 78ff00c2..7500da61 100644
--- a/TTS/tts/models/vits.py
+++ b/TTS/tts/models/vits.py
@@ -1630,13 +1630,13 @@ class Vits(BaseTTS):
             else:
                 if num_gpus > 1:
                     loader = DataLoader(
-                    dataset,
-                    sampler=sampler,
-                    batch_size=config.eval_batch_size if is_eval else config.batch_size,
-                    collate_fn=dataset.collate_fn,
-                    num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers,
-                    pin_memory=False,
-                )
+                        dataset,
+                        sampler=sampler,
+                        batch_size=config.eval_batch_size if is_eval else config.batch_size,
+                        collate_fn=dataset.collate_fn,
+                        num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers,
+                        pin_memory=False,
+                    )
                 else:
                     loader = DataLoader(
                         dataset,

From 3c15f0619a3c87aca6ed99c614d930575ecb7ee1 Mon Sep 17 00:00:00 2001
From: Roee Shenberg <shenberg@gmail.com>
Date: Wed, 15 Mar 2023 12:02:11 +0100
Subject: [PATCH 3/4] Bug fixes in OverFlow audio generation (#2380)

---
 TTS/tts/models/neuralhmm_tts.py | 7 ++++---
 TTS/tts/models/overflow.py      | 7 ++++---
 TTS/utils/generic_utils.py      | 5 +++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/TTS/tts/models/neuralhmm_tts.py b/TTS/tts/models/neuralhmm_tts.py
index e4f88452..e2414108 100644
--- a/TTS/tts/models/neuralhmm_tts.py
+++ b/TTS/tts/models/neuralhmm_tts.py
@@ -179,6 +179,7 @@ class NeuralhmmTTS(BaseTTS):
         Args:
             aux_inputs (Dict): Dictionary containing the auxiliary inputs.
         """
+        default_input_dict = default_input_dict.copy()
         default_input_dict.update(
             {
                 "sampling_temp": self.sampling_temp,
@@ -187,8 +188,8 @@ class NeuralhmmTTS(BaseTTS):
             }
         )
         if aux_input:
-            return format_aux_input(aux_input, default_input_dict)
-        return None
+            return format_aux_input(default_input_dict, aux_input)
+        return default_input_dict
 
     @torch.no_grad()
     def inference(
@@ -319,7 +320,7 @@ class NeuralhmmTTS(BaseTTS):
         # sample one item from the batch -1 will give the smalles item
         print(" | > Synthesising audio from the model...")
         inference_output = self.inference(
-            batch["text_input"][-1].unsqueeze(0), aux_input={"x_lenghts": batch["text_lengths"][-1].unsqueeze(0)}
+            batch["text_input"][-1].unsqueeze(0), aux_input={"x_lengths": batch["text_lengths"][-1].unsqueeze(0)}
         )
         figures["synthesised"] = plot_spectrogram(inference_output["model_outputs"][0], fig_size=(12, 3))
 
diff --git a/TTS/tts/models/overflow.py b/TTS/tts/models/overflow.py
index c2b5b7c2..92b3c767 100644
--- a/TTS/tts/models/overflow.py
+++ b/TTS/tts/models/overflow.py
@@ -192,6 +192,7 @@ class Overflow(BaseTTS):
         Args:
             aux_inputs (Dict): Dictionary containing the auxiliary inputs.
         """
+        default_input_dict = default_input_dict.copy()
         default_input_dict.update(
             {
                 "sampling_temp": self.sampling_temp,
@@ -200,8 +201,8 @@ class Overflow(BaseTTS):
             }
         )
         if aux_input:
-            return format_aux_input(aux_input, default_input_dict)
-        return None
+            return format_aux_input(default_input_dict, aux_input)
+        return default_input_dict
 
     @torch.no_grad()
     def inference(
@@ -335,7 +336,7 @@ class Overflow(BaseTTS):
         # sample one item from the batch -1 will give the smalles item
         print(" | > Synthesising audio from the model...")
         inference_output = self.inference(
-            batch["text_input"][-1].unsqueeze(0), aux_input={"x_lenghts": batch["text_lengths"][-1].unsqueeze(0)}
+            batch["text_input"][-1].unsqueeze(0), aux_input={"x_lengths": batch["text_lengths"][-1].unsqueeze(0)}
         )
         figures["synthesised"] = plot_spectrogram(inference_output["model_outputs"][0], fig_size=(12, 3))
 
diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py
index b685210c..d5312f49 100644
--- a/TTS/utils/generic_utils.py
+++ b/TTS/utils/generic_utils.py
@@ -167,9 +167,10 @@ def format_aux_input(def_args: Dict, kwargs: Dict) -> Dict:
     Returns:
         Dict: arguments with formatted auxilary inputs.
     """
+    kwargs = kwargs.copy()
     for name in def_args:
-        if name not in kwargs:
-            kwargs[def_args[name]] = None
+        if name not in kwargs or kwargs[name] is None:
+            kwargs[name] = def_args[name]
     return kwargs
 
 

From 2db262747ea84d1885648cb981add7e58637e30f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eren=20G=C3=B6lge?= <erogol@hotmail.com>
Date: Fri, 17 Mar 2023 13:21:03 +0100
Subject: [PATCH 4/4] Bump up to v0.12.0

---
 TTS/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TTS/VERSION b/TTS/VERSION
index 027934ea..ac454c6a 100644
--- a/TTS/VERSION
+++ b/TTS/VERSION
@@ -1 +1 @@
-0.11.1
\ No newline at end of file
+0.12.0