diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 831eddd5..89138e47 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,6 +6,7 @@ This repository is governed by [the Contributor Covenant Code of Conduct](https:
 
 ## Where to start.
 We welcome everyone who likes to contribute to 🐸TTS.
+
 You can contribute not only with code but with bug reports, comments, questions, answers, or just a simple tweet to spread the word.
 
 If you like to contribute code, squash a bug but if you don't know where to start, here are some pointers.
@@ -25,6 +26,16 @@ If you like to contribute code, squash a bug but if you don't know where to star
     We list all the target improvements for the next version. You can pick one of them and start contributing.
 
 - Also feel free to suggest new features, ideas and models. We're always open for new things.
+#####Call for sharing language models
+If possible, please consider sharing your pre-trained models in any language (if the licences allow for you to do so). We will include them in our model catalogue for public use and give the proper attribution, whether it be your name, company, website or any other source specified.
+
+This model can be shared in two ways:
+1. Share the model files with us and we serve them with the next 🐸 TTS release.
+2. Upload your models on GDrive and share the link.
+
+Models are served under `.models.json` file and any model is available under TTS CLI or Server end points.
+
+Either way you choose, please make sure you send the models [here](https://github.com/coqui-ai/TTS/issues/380). 
 ## Sending a ✨**PR**✨
 
 If you have a new feature, a model to implement, or a bug to squash, go ahead and send a ✨**PR**✨.
diff --git a/TTS/.models.json b/TTS/.models.json
index 73204db6..3c2ad8dc 100644
--- a/TTS/.models.json
+++ b/TTS/.models.json
@@ -132,7 +132,7 @@
             "thorsten":{
                 "tacotron2-DCA":{
                     "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/tts_models--de--thorsten--tacotron2-DCA.zip",
-                    "default_vocoder": "vocoder_models/de/thorsten/wavegrad",
+                    "default_vocoder": "vocoder_models/de/thorsten/fullband-melgan",
                     "author": "@thorstenMueller",
                     "commit": "unknown"
                 }
@@ -230,6 +230,11 @@
                     "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.0.11/vocoder_models--de--thorsten--wavegrad.zip",
                     "author": "@thorstenMueller",
                     "commit": "unknown"
+                },
+                "fullband-melgan":{
+                    "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.1.3/vocoder_models--de--thorsten--fullband-melgan.zip",
+                    "author": "@thorstenMueller",
+                    "commit": "unknown"
                 }
             }
         }
diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py
index 885d66b3..8c4d275f 100644
--- a/TTS/bin/compute_embeddings.py
+++ b/TTS/bin/compute_embeddings.py
@@ -1,80 +1,47 @@
 import argparse
-import glob
 import os
+from argparse import RawTextHelpFormatter
 
-import torch
 from tqdm import tqdm
 
-from TTS.config import BaseDatasetConfig, load_config
-from TTS.speaker_encoder.utils.generic_utils import setup_model
+from TTS.config import load_config
 from TTS.tts.datasets import load_meta_data
 from TTS.tts.utils.speakers import SpeakerManager
-from TTS.utils.audio import AudioProcessor
 
 parser = argparse.ArgumentParser(
-    description='Compute embedding vectors for each wav file in a dataset. If "target_dataset" is defined, it generates "speakers.json" necessary for training a multi-speaker model.'
+    description="""Compute embedding vectors for each wav file in a dataset.\n\n"""
+    """
+    Example runs:
+    python TTS/bin/compute_embeddings.py speaker_encoder_model.pth.tar speaker_encoder_config.json  dataset_config.json embeddings_output_path/
+    """,
+    formatter_class=RawTextHelpFormatter,
 )
-parser.add_argument("model_path", type=str, help="Path to model outputs (checkpoint, tensorboard etc.).")
+parser.add_argument("model_path", type=str, help="Path to model checkpoint file.")
 parser.add_argument(
     "config_path",
     type=str,
-    help="Path to config file for training.",
+    help="Path to model config file.",
 )
-parser.add_argument("data_path", type=str, help="Data path for wav files - directory or CSV file")
-parser.add_argument("output_path", type=str, help="path for output speakers.json.")
+
 parser.add_argument(
-    "--target_dataset",
+    "config_dataset_path",
     type=str,
-    default="",
-    help="Target dataset to pick a processor from TTS.tts.dataset.preprocess. Necessary to create a speakers.json file.",
+    help="Path to dataset config file.",
 )
+parser.add_argument("output_path", type=str, help="path for output speakers.json and/or speakers.npy.")
 parser.add_argument("--use_cuda", type=bool, help="flag to set cuda.", default=True)
-parser.add_argument("--separator", type=str, help="Separator used in file if CSV is passed for data_path", default="|")
+parser.add_argument("--eval", type=bool, help="compute eval.", default=True)
+
 args = parser.parse_args()
 
+c_dataset = load_config(args.config_dataset_path)
 
-c = load_config(args.config_path)
-ap = AudioProcessor(**c["audio"])
+meta_data_train, meta_data_eval = load_meta_data(c_dataset.datasets, eval_split=args.eval)
+wav_files = meta_data_train + meta_data_eval
 
-data_path = args.data_path
-split_ext = os.path.splitext(data_path)
-sep = args.separator
-
-if args.target_dataset != "":
-    # if target dataset is defined
-    dataset_config = [
-        BaseDatasetConfig(name=args.target_dataset, path=args.data_path, meta_file_train=None, meta_file_val=None),
-    ]
-    wav_files, _ = load_meta_data(dataset_config, eval_split=False)
-else:
-    # if target dataset is not defined
-    if len(split_ext) > 0 and split_ext[1].lower() == ".csv":
-        # Parse CSV
-        print(f"CSV file: {data_path}")
-        with open(data_path) as f:
-            wav_path = os.path.join(os.path.dirname(data_path), "wavs")
-            wav_files = []
-            print(f"Separator is: {sep}")
-            for line in f:
-                components = line.split(sep)
-                if len(components) != 2:
-                    print("Invalid line")
-                    continue
-                wav_file = os.path.join(wav_path, components[0] + ".wav")
-                # print(f'wav_file: {wav_file}')
-                if os.path.exists(wav_file):
-                    wav_files.append(wav_file)
-        print(f"Count of wavs imported: {len(wav_files)}")
-    else:
-        # Parse all wav files in data_path
-        wav_files = glob.glob(data_path + "/**/*.wav", recursive=True)
-
-# define Encoder model
-model = setup_model(c)
-model.load_state_dict(torch.load(args.model_path)["model"])
-model.eval()
-if args.use_cuda:
-    model.cuda()
+speaker_manager = SpeakerManager(
+    encoder_model_path=args.model_path, encoder_config_path=args.config_path, use_cuda=args.use_cuda
+)
 
 # compute speaker embeddings
 speaker_mapping = {}
@@ -85,18 +52,14 @@ for idx, wav_file in enumerate(tqdm(wav_files)):
     else:
         speaker_name = None
 
-    mel_spec = ap.melspectrogram(ap.load_wav(wav_file, sr=ap.sample_rate)).T
-    mel_spec = torch.FloatTensor(mel_spec[None, :, :])
-    if args.use_cuda:
-        mel_spec = mel_spec.cuda()
-    embedd = model.compute_embedding(mel_spec)
-    embedd = embedd.detach().cpu().numpy()
+    # extract the embedding
+    embedd = speaker_manager.compute_d_vector_from_clip(wav_file)
 
     # create speaker_mapping if target dataset is defined
     wav_file_name = os.path.basename(wav_file)
     speaker_mapping[wav_file_name] = {}
     speaker_mapping[wav_file_name]["name"] = speaker_name
-    speaker_mapping[wav_file_name]["embedding"] = embedd.flatten().tolist()
+    speaker_mapping[wav_file_name]["embedding"] = embedd
 
 if speaker_mapping:
     # save speaker_mapping if target dataset is defined
@@ -104,8 +67,9 @@ if speaker_mapping:
         mapping_file_path = os.path.join(args.output_path, "speakers.json")
     else:
         mapping_file_path = args.output_path
+
     os.makedirs(os.path.dirname(mapping_file_path), exist_ok=True)
-    speaker_manager = SpeakerManager()
+
     # pylint: disable=W0212
     speaker_manager._save_json(mapping_file_path, speaker_mapping)
     print("Speaker embeddings saved at:", mapping_file_path)
diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py
index b0159b86..1cbc5516 100755
--- a/TTS/bin/extract_tts_spectrograms.py
+++ b/TTS/bin/extract_tts_spectrograms.py
@@ -227,7 +227,7 @@ def main(args):  # pylint: disable=redefined-outer-name
     ap = AudioProcessor(**c.audio)
 
     # load data instances
-    meta_data_train, meta_data_eval = load_meta_data(c.datasets)
+    meta_data_train, meta_data_eval = load_meta_data(c.datasets, eval_split=args.eval)
 
     # use eval and training partitions
     meta_data = meta_data_train + meta_data_eval
@@ -271,6 +271,7 @@ if __name__ == "__main__":
     parser.add_argument("--debug", default=False, action="store_true", help="Save audio files for debug")
     parser.add_argument("--save_audio", default=False, action="store_true", help="Save audio files")
     parser.add_argument("--quantized", action="store_true", help="Save quantized audio files")
+    parser.add_argument("--eval", type=bool, help="compute eval.", default=True)
     args = parser.parse_args()
 
     c = load_config(args.config_path)
diff --git a/TTS/bin/find_unique_chars.py b/TTS/bin/find_unique_chars.py
index 75169569..16768e43 100644
--- a/TTS/bin/find_unique_chars.py
+++ b/TTS/bin/find_unique_chars.py
@@ -1,40 +1,41 @@
 """Find all the unique characters in a dataset"""
 import argparse
-import os
 from argparse import RawTextHelpFormatter
 
-from TTS.tts.datasets.formatters import get_preprocessor_by_name
+from TTS.config import load_config
+from TTS.tts.datasets import load_meta_data
 
 
 def main():
     # pylint: disable=bad-option-value
     parser = argparse.ArgumentParser(
         description="""Find all the unique characters or phonemes in a dataset.\n\n"""
-        """Target dataset must be defined in TTS.tts.datasets.formatters\n\n"""
         """
     Example runs:
 
-    python TTS/bin/find_unique_chars.py --dataset ljspeech --meta_file /path/to/LJSpeech/metadata.csv
+    python TTS/bin/find_unique_chars.py --config_path config.json
     """,
         formatter_class=RawTextHelpFormatter,
     )
-
-    parser.add_argument(
-        "--dataset", type=str, default="", help="One of the target dataset names in TTS.tts.datasets.formatters."
-    )
-
-    parser.add_argument("--meta_file", type=str, default=None, help="Path to the transcriptions file of the dataset.")
-
+    parser.add_argument("--config_path", type=str, help="Path to dataset config file.", required=True)
     args = parser.parse_args()
 
-    preprocessor = get_preprocessor_by_name(args.dataset)
-    items = preprocessor(os.path.dirname(args.meta_file), os.path.basename(args.meta_file))
+    c = load_config(args.config_path)
+
+    # load all datasets
+    train_items, eval_items = load_meta_data(c.datasets, eval_split=True)
+    items = train_items + eval_items
+
     texts = "".join(item[0] for item in items)
     chars = set(texts)
     lower_chars = filter(lambda c: c.islower(), chars)
+    chars_force_lower = [c.lower() for c in chars]
+    chars_force_lower = set(chars_force_lower)
+
     print(f" > Number of unique characters: {len(chars)}")
     print(f" > Unique characters: {''.join(sorted(chars))}")
     print(f" > Unique lower characters: {''.join(sorted(lower_chars))}")
+    print(f" > Unique all forced to lower characters: {''.join(sorted(chars_force_lower))}")
 
 
 if __name__ == "__main__":
diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py
index 38902a18..2bb5bfc7 100644
--- a/TTS/bin/train_encoder.py
+++ b/TTS/bin/train_encoder.py
@@ -164,7 +164,7 @@ def main(args):  # pylint: disable=redefined-outer-name
     elif c.loss == "angleproto":
         criterion = AngleProtoLoss()
     elif c.loss == "softmaxproto":
-        criterion = SoftmaxAngleProtoLoss(c.model["proj_dim"], num_speakers)
+        criterion = SoftmaxAngleProtoLoss(c.model_params["proj_dim"], num_speakers)
     else:
         raise Exception("The %s  not is a loss supported" % c.loss)
 
diff --git a/TTS/server/server.py b/TTS/server/server.py
index dc025b32..e90d93e6 100644
--- a/TTS/server/server.py
+++ b/TTS/server/server.py
@@ -103,7 +103,8 @@ synthesizer = Synthesizer(
     model_path, config_path, speakers_file_path, vocoder_path, vocoder_config_path, use_cuda=args.use_cuda
 )
 
-use_multi_speaker = synthesizer.speaker_manager is not None
+use_multi_speaker = synthesizer.tts_model.speaker_manager is not None and synthesizer.tts_model.num_speakers > 1
+speaker_manager = synthesizer.tts_model.speaker_manager if hasattr(synthesizer.tts_model, "speaker_manager") else None
 # TODO: set this from SpeakerManager
 use_gst = synthesizer.tts_config.get("use_gst", False)
 app = Flask(__name__)
@@ -134,7 +135,7 @@ def index():
         "index.html",
         show_details=args.show_details,
         use_multi_speaker=use_multi_speaker,
-        speaker_ids=synthesizer.speaker_manager.speaker_ids if synthesizer.speaker_manager else None,
+        speaker_ids=speaker_manager.speaker_ids if speaker_manager is not None else None,
         use_gst=use_gst,
     )
 
diff --git a/TTS/speaker_encoder/models/lstm.py b/TTS/speaker_encoder/models/lstm.py
index 05a56675..7e39087a 100644
--- a/TTS/speaker_encoder/models/lstm.py
+++ b/TTS/speaker_encoder/models/lstm.py
@@ -1,3 +1,4 @@
+import numpy as np
 import torch
 from torch import nn
 
@@ -70,24 +71,32 @@ class LSTMSpeakerEncoder(nn.Module):
             d = torch.nn.functional.normalize(d, p=2, dim=1)
         return d
 
-    def compute_embedding(self, x, num_frames=160, overlap=0.5):
+    def compute_embedding(self, x, num_frames=250, num_eval=10, return_mean=True):
         """
         Generate embeddings for a batch of utterances
         x: 1xTxD
         """
-        num_overlap = int(num_frames * overlap)
         max_len = x.shape[1]
-        embed = None
-        cur_iter = 0
-        for offset in range(0, max_len, num_frames - num_overlap):
-            cur_iter += 1
-            end_offset = min(x.shape[1], offset + num_frames)
+
+        if max_len < num_frames:
+            num_frames = max_len
+
+        offsets = np.linspace(0, max_len - num_frames, num=num_eval)
+
+        frames_batch = []
+        for offset in offsets:
+            offset = int(offset)
+            end_offset = int(offset + num_frames)
             frames = x[:, offset:end_offset]
-            if embed is None:
-                embed = self.inference(frames)
-            else:
-                embed += self.inference(frames)
-        return embed / cur_iter
+            frames_batch.append(frames)
+
+        frames_batch = torch.cat(frames_batch, dim=0)
+        embeddings = self.inference(frames_batch)
+
+        if return_mean:
+            embeddings = torch.mean(embeddings, dim=0, keepdim=True)
+
+        return embeddings
 
     def batch_compute_embedding(self, x, seq_lens, num_frames=160, overlap=0.5):
         """
@@ -110,9 +119,11 @@ class LSTMSpeakerEncoder(nn.Module):
         return embed / num_iters
 
     # pylint: disable=unused-argument, redefined-builtin
-    def load_checkpoint(self, config: dict, checkpoint_path: str, eval: bool = False):
+    def load_checkpoint(self, config: dict, checkpoint_path: str, eval: bool = False, use_cuda: bool = False):
         state = torch.load(checkpoint_path, map_location=torch.device("cpu"))
         self.load_state_dict(state["model"])
+        if use_cuda:
+            self.cuda()
         if eval:
             self.eval()
             assert not self.training
diff --git a/TTS/speaker_encoder/models/resnet.py b/TTS/speaker_encoder/models/resnet.py
index ce86b01f..f52bb4d5 100644
--- a/TTS/speaker_encoder/models/resnet.py
+++ b/TTS/speaker_encoder/models/resnet.py
@@ -199,3 +199,12 @@ class ResNetSpeakerEncoder(nn.Module):
             embeddings = torch.mean(embeddings, dim=0, keepdim=True)
 
         return embeddings
+
+    def load_checkpoint(self, config: dict, checkpoint_path: str, eval: bool = False, use_cuda: bool = False):
+        state = torch.load(checkpoint_path, map_location=torch.device("cpu"))
+        self.load_state_dict(state["model"])
+        if use_cuda:
+            self.cuda()
+        if eval:
+            self.eval()
+            assert not self.training
diff --git a/TTS/trainer.py b/TTS/trainer.py
index c56be140..903aee5f 100644
--- a/TTS/trainer.py
+++ b/TTS/trainer.py
@@ -764,11 +764,11 @@ class Trainer:
         """Run test and log the results. Test run must be defined by the model.
         Model must return figures and audios to be logged by the Tensorboard."""
         if hasattr(self.model, "test_run"):
-            if hasattr(self.eval_loader.load_test_samples):
-                samples = self.eval_loader.load_test_samples(1)
-                figures, audios = self.model.test_run(samples)
+            if hasattr(self.eval_loader.dataset, "load_test_samples"):
+                samples = self.eval_loader.dataset.load_test_samples(1)
+                figures, audios = self.model.test_run(self.ap, samples, None)
             else:
-                figures, audios = self.model.test_run()
+                figures, audios = self.model.test_run(self.ap)
             self.tb_logger.tb_test_audios(self.total_steps_done, audios, self.config.audio["sample_rate"])
             self.tb_logger.tb_test_figures(self.total_steps_done, figures)
 
@@ -790,7 +790,7 @@ class Trainer:
             self.train_epoch()
             if self.config.run_eval:
                 self.eval_epoch()
-            if epoch >= self.config.test_delay_epochs and self.args.rank < 0:
+            if epoch >= self.config.test_delay_epochs and self.args.rank <= 0:
                 self.test_run()
             self.c_logger.print_epoch_end(
                 epoch, self.keep_avg_eval.avg_values if self.config.run_eval else self.keep_avg_train.avg_values
diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py
index db7841f4..c057c51e 100644
--- a/TTS/tts/datasets/formatters.py
+++ b/TTS/tts/datasets/formatters.py
@@ -202,16 +202,20 @@ def libri_tts(root_path, meta_files=None):
     items = []
     if meta_files is None:
         meta_files = glob(f"{root_path}/**/*trans.tsv", recursive=True)
+    else:
+        if isinstance(meta_files, str):
+            meta_files = [os.path.join(root_path, meta_files)]
+
     for meta_file in meta_files:
         _meta_file = os.path.basename(meta_file).split(".")[0]
-        speaker_name = _meta_file.split("_")[0]
-        chapter_id = _meta_file.split("_")[1]
-        _root_path = os.path.join(root_path, f"{speaker_name}/{chapter_id}")
         with open(meta_file, "r") as ttf:
             for line in ttf:
                 cols = line.split("\t")
-                wav_file = os.path.join(_root_path, cols[0] + ".wav")
-                text = cols[1]
+                file_name = cols[0]
+                speaker_name, chapter_id, *_ = cols[0].split("_")
+                _root_path = os.path.join(root_path, f"{speaker_name}/{chapter_id}")
+                wav_file = os.path.join(_root_path, file_name + ".wav")
+                text = cols[2]
                 items.append([text, wav_file, "LTTS_" + speaker_name])
     for item in items:
         assert os.path.exists(item[1]), f" [!] wav files don't exist - {item[1]}"
@@ -288,6 +292,19 @@ def vctk_slim(root_path, meta_files=None, wavs_path="wav48"):
     return items
 
 
+def mls(root_path, meta_files=None):
+    """http://www.openslr.org/94/"""
+    items = []
+    with open(os.path.join(root_path, meta_files), "r") as meta:
+        for line in meta:
+            file, text = line.split("\t")
+            text = text[:-1]
+            speaker, book, *_ = file.split("_")
+            wav_file = os.path.join(root_path, os.path.dirname(meta_files), "audio", speaker, book, file + ".wav")
+            items.append([text, wav_file, "MLS_" + speaker])
+    return items
+
+
 # ======================================== VOX CELEB ===========================================
 def voxceleb2(root_path, meta_file=None):
     """
diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py
index 86d34c30..07b58974 100644
--- a/TTS/tts/layers/losses.py
+++ b/TTS/tts/layers/losses.py
@@ -246,9 +246,9 @@ class Huber(nn.Module):
 class TacotronLoss(torch.nn.Module):
     """Collection of Tacotron set-up based on provided config."""
 
-    def __init__(self, c, stopnet_pos_weight=10, ga_sigma=0.4):
+    def __init__(self, c, ga_sigma=0.4):
         super().__init__()
-        self.stopnet_pos_weight = stopnet_pos_weight
+        self.stopnet_pos_weight = c.stopnet_pos_weight
         self.ga_alpha = c.ga_alpha
         self.decoder_diff_spec_alpha = c.decoder_diff_spec_alpha
         self.postnet_diff_spec_alpha = c.postnet_diff_spec_alpha
@@ -274,7 +274,7 @@ class TacotronLoss(torch.nn.Module):
             self.criterion_ssim = SSIMLoss()
         # stopnet loss
         # pylint: disable=not-callable
-        self.criterion_st = BCELossMasked(pos_weight=torch.tensor(stopnet_pos_weight)) if c.stopnet else None
+        self.criterion_st = BCELossMasked(pos_weight=torch.tensor(self.stopnet_pos_weight)) if c.stopnet else None
 
     def forward(
         self,
@@ -284,6 +284,7 @@ class TacotronLoss(torch.nn.Module):
         linear_input,
         stopnet_output,
         stopnet_target,
+        stop_target_length,
         output_lens,
         decoder_b_output,
         alignments,
@@ -315,12 +316,12 @@ class TacotronLoss(torch.nn.Module):
         return_dict["decoder_loss"] = decoder_loss
         return_dict["postnet_loss"] = postnet_loss
 
-        # stopnet loss
         stop_loss = (
-            self.criterion_st(stopnet_output, stopnet_target, output_lens) if self.config.stopnet else torch.zeros(1)
+            self.criterion_st(stopnet_output, stopnet_target, stop_target_length)
+            if self.config.stopnet
+            else torch.zeros(1)
         )
-        if not self.config.separate_stopnet and self.config.stopnet:
-            loss += stop_loss
+        loss += stop_loss
         return_dict["stopnet_loss"] = stop_loss
 
         # backward decoder loss (if enabled)
diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py
index 2ec268d6..b36ed106 100644
--- a/TTS/tts/models/base_tts.py
+++ b/TTS/tts/models/base_tts.py
@@ -70,7 +70,7 @@ class BaseTTS(BaseModel):
 
     def get_aux_input(self, **kwargs) -> Dict:
         """Prepare and return `aux_input` used by `forward()`"""
-        pass
+        return {"speaker_id": None, "style_wav": None, "d_vector": None}
 
     def format_batch(self, batch: Dict) -> Dict:
         """Generic batch formatting for `TTSDataset`.
@@ -119,9 +119,10 @@ class BaseTTS(BaseModel):
                 ), f" [!] total duration {dur.sum()} vs spectrogram length {mel_lengths[idx]}"
                 durations[idx, : text_lengths[idx]] = dur
 
-        # set stop targets view, we predict a single stop token per iteration.
+        # set stop targets wrt reduction factor
         stop_targets = stop_targets.view(text_input.shape[0], stop_targets.size(1) // self.config.r, -1)
         stop_targets = (stop_targets.sum(2) > 0.0).unsqueeze(2).float().squeeze(2)
+        stop_target_lengths = torch.divide(mel_lengths, self.config.r).ceil_()
 
         return {
             "text_input": text_input,
@@ -131,6 +132,7 @@ class BaseTTS(BaseModel):
             "mel_lengths": mel_lengths,
             "linear_input": linear_input,
             "stop_targets": stop_targets,
+            "stop_target_lengths": stop_target_lengths,
             "attn_mask": attn_mask,
             "durations": durations,
             "speaker_ids": speaker_ids,
@@ -200,7 +202,7 @@ class BaseTTS(BaseModel):
             )
         return loader
 
-    def test_run(self) -> Tuple[Dict, Dict]:
+    def test_run(self, ap) -> Tuple[Dict, Dict]:
         """Generic test run for `tts` models used by `Trainer`.
 
         You can override this for a different behaviour.
@@ -212,14 +214,14 @@ class BaseTTS(BaseModel):
         test_audios = {}
         test_figures = {}
         test_sentences = self.config.test_sentences
-        aux_inputs = self._get_aux_inputs()
+        aux_inputs = self.get_aux_input()
         for idx, sen in enumerate(test_sentences):
             wav, alignment, model_outputs, _ = synthesis(
-                self.model,
+                self,
                 sen,
                 self.config,
-                self.use_cuda,
-                self.ap,
+                "cuda" in str(next(self.parameters()).device),
+                ap,
                 speaker_id=aux_inputs["speaker_id"],
                 d_vector=aux_inputs["d_vector"],
                 style_wav=aux_inputs["style_wav"],
@@ -229,6 +231,6 @@ class BaseTTS(BaseModel):
             ).values()
 
             test_audios["{}-audio".format(idx)] = wav
-            test_figures["{}-prediction".format(idx)] = plot_spectrogram(model_outputs, self.ap, output_fig=False)
+            test_figures["{}-prediction".format(idx)] = plot_spectrogram(model_outputs, ap, output_fig=False)
             test_figures["{}-alignment".format(idx)] = plot_alignment(alignment, output_fig=False)
         return test_figures, test_audios
diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py
index 9f235fad..b3bceb09 100755
--- a/TTS/tts/models/glow_tts.py
+++ b/TTS/tts/models/glow_tts.py
@@ -113,7 +113,7 @@ class GlowTTS(BaseTTS):
 
     @staticmethod
     def compute_outputs(attn, o_mean, o_log_scale, x_mask):
-        """ Compute and format the mode outputs with the given alignment map"""
+        """Compute and format the mode outputs with the given alignment map"""
         y_mean = torch.matmul(attn.squeeze(1).transpose(1, 2), o_mean.transpose(1, 2)).transpose(
             1, 2
         )  # [b, t', t], [b, t, d] -> [b, d, t']
diff --git a/TTS/tts/models/tacotron.py b/TTS/tts/models/tacotron.py
index 95b4a358..7949ddf9 100644
--- a/TTS/tts/models/tacotron.py
+++ b/TTS/tts/models/tacotron.py
@@ -219,6 +219,7 @@ class Tacotron(BaseTacotron):
         mel_lengths = batch["mel_lengths"]
         linear_input = batch["linear_input"]
         stop_targets = batch["stop_targets"]
+        stop_target_lengths = batch["stop_target_lengths"]
         speaker_ids = batch["speaker_ids"]
         d_vectors = batch["d_vectors"]
 
@@ -250,6 +251,7 @@ class Tacotron(BaseTacotron):
             linear_input,
             outputs["stop_tokens"],
             stop_targets,
+            stop_target_lengths,
             mel_lengths,
             outputs["decoder_outputs_backward"],
             outputs["alignments"],
diff --git a/TTS/tts/models/tacotron2.py b/TTS/tts/models/tacotron2.py
index eaca3ff8..19619662 100644
--- a/TTS/tts/models/tacotron2.py
+++ b/TTS/tts/models/tacotron2.py
@@ -224,6 +224,7 @@ class Tacotron2(BaseTacotron):
         mel_lengths = batch["mel_lengths"]
         linear_input = batch["linear_input"]
         stop_targets = batch["stop_targets"]
+        stop_target_lengths = batch["stop_target_lengths"]
         speaker_ids = batch["speaker_ids"]
         d_vectors = batch["d_vectors"]
 
@@ -255,6 +256,7 @@ class Tacotron2(BaseTacotron):
             linear_input,
             outputs["stop_tokens"],
             stop_targets,
+            stop_target_lengths,
             mel_lengths,
             outputs["decoder_outputs_backward"],
             outputs["alignments"],
diff --git a/TTS/tts/utils/data.py b/TTS/tts/utils/data.py
index 3ff52195..887f4376 100644
--- a/TTS/tts/utils/data.py
+++ b/TTS/tts/utils/data.py
@@ -27,10 +27,19 @@ def prepare_tensor(inputs, out_steps):
     return np.stack([_pad_tensor(x, pad_len) for x in inputs])
 
 
-def _pad_stop_target(x, length):
-    _pad = 0.0
+def _pad_stop_target(x: np.ndarray, length: int, pad_val=1) -> np.ndarray:
+    """Pad stop target array.
+
+    Args:
+        x (np.ndarray): Stop target array.
+        length (int): Length after padding.
+        pad_val (int, optional): Padding value. Defaults to 1.
+
+    Returns:
+        np.ndarray: Padded stop target array.
+    """
     assert x.ndim == 1
-    return np.pad(x, (0, length - x.shape[0]), mode="constant", constant_values=_pad)
+    return np.pad(x, (0, length - x.shape[0]), mode="constant", constant_values=pad_val)
 
 
 def prepare_stop_target(inputs, out_steps):
diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py
index 8febcbbf..a8c9e0f6 100755
--- a/TTS/tts/utils/speakers.py
+++ b/TTS/tts/utils/speakers.py
@@ -59,6 +59,7 @@ class SpeakerManager:
         speaker_id_file_path: str = "",
         encoder_model_path: str = "",
         encoder_config_path: str = "",
+        use_cuda: bool = False,
     ):
 
         self.data_items = []
@@ -67,6 +68,7 @@ class SpeakerManager:
         self.clip_ids = []
         self.speaker_encoder = None
         self.speaker_encoder_ap = None
+        self.use_cuda = use_cuda
 
         if data_items:
             self.speaker_ids, self.speaker_names, _ = self.parse_speakers_from_data(self.data_items)
@@ -222,11 +224,11 @@ class SpeakerManager:
         """
         self.speaker_encoder_config = load_config(config_path)
         self.speaker_encoder = setup_model(self.speaker_encoder_config)
-        self.speaker_encoder.load_checkpoint(config_path, model_path, True)
+        self.speaker_encoder.load_checkpoint(config_path, model_path, eval=True, use_cuda=self.use_cuda)
         self.speaker_encoder_ap = AudioProcessor(**self.speaker_encoder_config.audio)
         # normalize the input audio level and trim silences
-        self.speaker_encoder_ap.do_sound_norm = True
-        self.speaker_encoder_ap.do_trim_silence = True
+        # self.speaker_encoder_ap.do_sound_norm = True
+        # self.speaker_encoder_ap.do_trim_silence = True
 
     def compute_d_vector_from_clip(self, wav_file: Union[str, list]) -> list:
         """Compute a d_vector from a given audio file.
@@ -242,6 +244,8 @@ class SpeakerManager:
             waveform = self.speaker_encoder_ap.load_wav(wav_file, sr=self.speaker_encoder_ap.sample_rate)
             spec = self.speaker_encoder_ap.melspectrogram(waveform)
             spec = torch.from_numpy(spec.T)
+            if self.use_cuda:
+                spec = spec.cuda()
             spec = spec.unsqueeze(0)
             d_vector = self.speaker_encoder.compute_embedding(spec)
             return d_vector
@@ -272,6 +276,8 @@ class SpeakerManager:
             feats = torch.from_numpy(feats)
         if feats.ndim == 2:
             feats = feats.unsqueeze(0)
+        if self.use_cuda:
+            feats = feats.cuda()
         return self.speaker_encoder.compute_embedding(feats)
 
     def run_umap(self):
diff --git a/TTS/vocoder/datasets/wavegrad_dataset.py b/TTS/vocoder/datasets/wavegrad_dataset.py
index d99fc417..05e0fae8 100644
--- a/TTS/vocoder/datasets/wavegrad_dataset.py
+++ b/TTS/vocoder/datasets/wavegrad_dataset.py
@@ -2,6 +2,7 @@ import glob
 import os
 import random
 from multiprocessing import Manager
+from typing import List, Tuple
 
 import numpy as np
 import torch
@@ -67,7 +68,19 @@ class WaveGradDataset(Dataset):
         item = self.load_item(idx)
         return item
 
-    def load_test_samples(self, num_samples):
+    def load_test_samples(self, num_samples: int) -> List[Tuple]:
+        """Return test samples.
+
+        Args:
+            num_samples (int): Number of samples to return.
+
+        Returns:
+            List[Tuple]: melspectorgram and audio.
+
+        Shapes:
+            - melspectrogram (Tensor): :math:`[C, T]`
+            - audio (Tensor): :math:`[T_audio]`
+        """
         samples = []
         return_segments = self.return_segments
         self.return_segments = False
diff --git a/TTS/vocoder/models/__init__.py b/TTS/vocoder/models/__init__.py
index 9479095e..7c209af4 100644
--- a/TTS/vocoder/models/__init__.py
+++ b/TTS/vocoder/models/__init__.py
@@ -31,7 +31,7 @@ def setup_model(config: Coqpit):
 
 
 def setup_generator(c):
-    """ TODO: use config object as arguments"""
+    """TODO: use config object as arguments"""
     print(" > Generator Model: {}".format(c.generator_model))
     MyModel = importlib.import_module("TTS.vocoder.models." + c.generator_model.lower())
     MyModel = getattr(MyModel, to_camel(c.generator_model))
@@ -94,7 +94,7 @@ def setup_generator(c):
 
 
 def setup_discriminator(c):
-    """ TODO: use config objekt as arguments"""
+    """TODO: use config objekt as arguments"""
     print(" > Discriminator Model: {}".format(c.discriminator_model))
     if "parallel_wavegan" in c.discriminator_model:
         MyModel = importlib.import_module("TTS.vocoder.models.parallel_wavegan_discriminator")
diff --git a/TTS/vocoder/models/wavegrad.py b/TTS/vocoder/models/wavegrad.py
index 03d5160e..d2983be2 100644
--- a/TTS/vocoder/models/wavegrad.py
+++ b/TTS/vocoder/models/wavegrad.py
@@ -124,11 +124,16 @@ class Wavegrad(BaseModel):
 
     @torch.no_grad()
     def inference(self, x, y_n=None):
-        """x: B x D X T"""
+        """
+        Shapes:
+            x: :math:`[B, C , T]`
+            y_n: :math:`[B, 1, T]`
+        """
         if y_n is None:
-            y_n = torch.randn(x.shape[0], 1, self.hop_len * x.shape[-1], dtype=torch.float32).to(x)
+            y_n = torch.randn(x.shape[0], 1, self.hop_len * x.shape[-1])
         else:
-            y_n = torch.FloatTensor(y_n).unsqueeze(0).unsqueeze(0).to(x)
+            y_n = torch.FloatTensor(y_n).unsqueeze(0).unsqueeze(0)
+        y_n = y_n.type_as(x)
         sqrt_alpha_hat = self.noise_level.to(x)
         for n in range(len(self.alpha) - 1, -1, -1):
             y_n = self.c1[n] * (y_n - self.c2[n] * self.forward(y_n, x, sqrt_alpha_hat[n].repeat(x.shape[0])))
@@ -267,8 +272,10 @@ class Wavegrad(BaseModel):
         betas = np.linspace(noise_schedule["min_val"], noise_schedule["max_val"], noise_schedule["num_steps"])
         self.compute_noise_level(betas)
         for sample in samples:
-            x = sample["input"]
-            y = sample["waveform"]
+            x = sample[0]
+            x = x[None, :, :].to(next(self.parameters()).device)
+            y = sample[1]
+            y = y[None, :]
             # compute voice
             y_pred = self.inference(x)
             # compute spectrograms
diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py
index a5d89d5a..c2e47120 100644
--- a/TTS/vocoder/models/wavernn.py
+++ b/TTS/vocoder/models/wavernn.py
@@ -322,7 +322,7 @@ class Wavernn(BaseVocoder):
 
         with torch.no_grad():
             if isinstance(mels, np.ndarray):
-                mels = torch.FloatTensor(mels).type_as(mels)
+                mels = torch.FloatTensor(mels).to(str(next(self.parameters()).device))
 
             if mels.ndim == 2:
                 mels = mels.unsqueeze(0)
@@ -576,7 +576,8 @@ class Wavernn(BaseVocoder):
         figures = {}
         audios = {}
         for idx, sample in enumerate(samples):
-            x = sample["input"]
+            x = torch.FloatTensor(sample[0])
+            x = x.to(next(self.parameters()).device)
             y_hat = self.inference(x, self.config.batched, self.config.target_samples, self.config.overlap_samples)
             x_hat = ap.melspectrogram(y_hat)
             figures.update(
@@ -585,7 +586,7 @@ class Wavernn(BaseVocoder):
                     f"test_{idx}/prediction": plot_spectrogram(x_hat.T),
                 }
             )
-            audios.update({f"test_{idx}/audio", y_hat})
+            audios.update({f"test_{idx}/audio": y_hat})
         return figures, audios
 
     @staticmethod
diff --git a/docs/source/tts_datasets.md b/docs/source/tts_datasets.md
index 6075bc95..852ccd37 100644
--- a/docs/source/tts_datasets.md
+++ b/docs/source/tts_datasets.md
@@ -11,6 +11,6 @@ Some of the known public datasets that we successfully applied 🐸TTS:
 - [Spanish](https://drive.google.com/file/d/1Sm_zyBo67XHkiFhcRSQ4YaHPYM0slO_e/view?usp=sharing) - thx! @carlfm01
 - [German - Thorsten OGVD](https://github.com/thorstenMueller/deep-learning-german-tts)
 - [Japanese - Kokoro](https://www.kaggle.com/kaiida/kokoro-speech-dataset-v11-small/version/1)
-- [Chinese](https://www.data-baker.com/open_source.html)
+- [Chinese](https://www.data-baker.com/data/index/source/)
 
 Let us know if you use 🐸TTS on a different dataset.
\ No newline at end of file
diff --git a/hubconf.py b/hubconf.py
index 96f12b5f..0c9c5930 100644
--- a/hubconf.py
+++ b/hubconf.py
@@ -1,5 +1,5 @@
 dependencies = [
-    'torch', 'gdown', 'pysbd', 'gruut', 'anyascii', 'pypinyin', 'coqpit', 'mecab-python3', 'unidic-lite`
+    'torch', 'gdown', 'pysbd', 'gruut', 'anyascii', 'pypinyin', 'coqpit', 'mecab-python3', 'unidic-lite'
 ]
 import torch
 
diff --git a/notebooks/PlotUmapLibriTTS.ipynb b/notebooks/PlotUmapLibriTTS.ipynb
index 0448f3df..ec20383f 100644
--- a/notebooks/PlotUmapLibriTTS.ipynb
+++ b/notebooks/PlotUmapLibriTTS.ipynb
@@ -13,11 +13,7 @@
   },
   {
    "cell_type": "code",
-<<<<<<< HEAD
-   "execution_count": 2,
-=======
    "execution_count": null,
->>>>>>> dev
    "metadata": {},
    "outputs": [],
    "source": [
@@ -29,15 +25,10 @@
     "import umap\n",
     "\n",
     "from TTS.speaker_encoder.model import SpeakerEncoder\n",
-<<<<<<< HEAD
-    "from TTS.utils.audio import AudioProcessor\n",
-    "from TTS.utils.io import load_config\n",
-=======
     "from TTS.utils.audio import AudioProcessor
 
 \n",
     "from TTS.tts.utils.generic_utils import load_config\n",
->>>>>>> dev
     "\n",
     "from bokeh.io import output_notebook, show\n",
     "from bokeh.plotting import figure\n",
@@ -59,331 +50,9 @@
   },
   {
    "cell_type": "code",
-<<<<<<< HEAD
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "    <div class=\"bk-root\">\n",
-       "        <a href=\"https://bokeh.org\" target=\"_blank\" class=\"bk-logo bk-logo-small bk-logo-notebook\"></a>\n",
-       "        <span id=\"1001\">Loading BokehJS ...</span>\n",
-       "    </div>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/javascript": [
-       "\n",
-       "(function(root) {\n",
-       "  function now() {\n",
-       "    return new Date();\n",
-       "  }\n",
-       "\n",
-       "  var force = true;\n",
-       "\n",
-       "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
-       "    root._bokeh_onload_callbacks = [];\n",
-       "    root._bokeh_is_loading = undefined;\n",
-       "  }\n",
-       "\n",
-       "  var JS_MIME_TYPE = 'application/javascript';\n",
-       "  var HTML_MIME_TYPE = 'text/html';\n",
-       "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
-       "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
-       "\n",
-       "  /**\n",
-       "   * Render data to the DOM node\n",
-       "   */\n",
-       "  function render(props, node) {\n",
-       "    var script = document.createElement(\"script\");\n",
-       "    node.appendChild(script);\n",
-       "  }\n",
-       "\n",
-       "  /**\n",
-       "   * Handle when an output is cleared or removed\n",
-       "   */\n",
-       "  function handleClearOutput(event, handle) {\n",
-       "    var cell = handle.cell;\n",
-       "\n",
-       "    var id = cell.output_area._bokeh_element_id;\n",
-       "    var server_id = cell.output_area._bokeh_server_id;\n",
-       "    // Clean up Bokeh references\n",
-       "    if (id != null && id in Bokeh.index) {\n",
-       "      Bokeh.index[id].model.document.clear();\n",
-       "      delete Bokeh.index[id];\n",
-       "    }\n",
-       "\n",
-       "    if (server_id !== undefined) {\n",
-       "      // Clean up Bokeh references\n",
-       "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
-       "      cell.notebook.kernel.execute(cmd, {\n",
-       "        iopub: {\n",
-       "          output: function(msg) {\n",
-       "            var id = msg.content.text.trim();\n",
-       "            if (id in Bokeh.index) {\n",
-       "              Bokeh.index[id].model.document.clear();\n",
-       "              delete Bokeh.index[id];\n",
-       "            }\n",
-       "          }\n",
-       "        }\n",
-       "      });\n",
-       "      // Destroy server and session\n",
-       "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
-       "      cell.notebook.kernel.execute(cmd);\n",
-       "    }\n",
-       "  }\n",
-       "\n",
-       "  /**\n",
-       "   * Handle when a new output is added\n",
-       "   */\n",
-       "  function handleAddOutput(event, handle) {\n",
-       "    var output_area = handle.output_area;\n",
-       "    var output = handle.output;\n",
-       "\n",
-       "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
-       "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
-       "      return\n",
-       "    }\n",
-       "\n",
-       "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
-       "\n",
-       "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
-       "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
-       "      // store reference to embed id on output_area\n",
-       "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
-       "    }\n",
-       "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
-       "      var bk_div = document.createElement(\"div\");\n",
-       "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
-       "      var script_attrs = bk_div.children[0].attributes;\n",
-       "      for (var i = 0; i < script_attrs.length; i++) {\n",
-       "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
-       "      }\n",
-       "      // store reference to server id on output_area\n",
-       "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
-       "    }\n",
-       "  }\n",
-       "\n",
-       "  function register_renderer(events, OutputArea) {\n",
-       "\n",
-       "    function append_mime(data, metadata, element) {\n",
-       "      // create a DOM node to render to\n",
-       "      var toinsert = this.create_output_subarea(\n",
-       "        metadata,\n",
-       "        CLASS_NAME,\n",
-       "        EXEC_MIME_TYPE\n",
-       "      );\n",
-       "      this.keyboard_manager.register_events(toinsert);\n",
-       "      // Render to node\n",
-       "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
-       "      render(props, toinsert[toinsert.length - 1]);\n",
-       "      element.append(toinsert);\n",
-       "      return toinsert\n",
-       "    }\n",
-       "\n",
-       "    /* Handle when an output is cleared or removed */\n",
-       "    events.on('clear_output.CodeCell', handleClearOutput);\n",
-       "    events.on('delete.Cell', handleClearOutput);\n",
-       "\n",
-       "    /* Handle when a new output is added */\n",
-       "    events.on('output_added.OutputArea', handleAddOutput);\n",
-       "\n",
-       "    /**\n",
-       "     * Register the mime type and append_mime function with output_area\n",
-       "     */\n",
-       "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
-       "      /* Is output safe? */\n",
-       "      safe: true,\n",
-       "      /* Index of renderer in `output_area.display_order` */\n",
-       "      index: 0\n",
-       "    });\n",
-       "  }\n",
-       "\n",
-       "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
-       "  if (root.Jupyter !== undefined) {\n",
-       "    var events = require('base/js/events');\n",
-       "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
-       "\n",
-       "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
-       "      register_renderer(events, OutputArea);\n",
-       "    }\n",
-       "  }\n",
-       "\n",
-       "  \n",
-       "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
-       "    root._bokeh_timeout = Date.now() + 5000;\n",
-       "    root._bokeh_failed_load = false;\n",
-       "  }\n",
-       "\n",
-       "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
-       "     \"<div style='background-color: #fdd'>\\n\"+\n",
-       "     \"<p>\\n\"+\n",
-       "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
-       "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
-       "     \"</p>\\n\"+\n",
-       "     \"<ul>\\n\"+\n",
-       "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
-       "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
-       "     \"</ul>\\n\"+\n",
-       "     \"<code>\\n\"+\n",
-       "     \"from bokeh.resources import INLINE\\n\"+\n",
-       "     \"output_notebook(resources=INLINE)\\n\"+\n",
-       "     \"</code>\\n\"+\n",
-       "     \"</div>\"}};\n",
-       "\n",
-       "  function display_loaded() {\n",
-       "    var el = document.getElementById(\"1001\");\n",
-       "    if (el != null) {\n",
-       "      el.textContent = \"BokehJS is loading...\";\n",
-       "    }\n",
-       "    if (root.Bokeh !== undefined) {\n",
-       "      if (el != null) {\n",
-       "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
-       "      }\n",
-       "    } else if (Date.now() < root._bokeh_timeout) {\n",
-       "      setTimeout(display_loaded, 100)\n",
-       "    }\n",
-       "  }\n",
-       "\n",
-       "\n",
-       "  function run_callbacks() {\n",
-       "    try {\n",
-       "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
-       "        if (callback != null)\n",
-       "          callback();\n",
-       "      });\n",
-       "    } finally {\n",
-       "      delete root._bokeh_onload_callbacks\n",
-       "    }\n",
-       "    console.debug(\"Bokeh: all callbacks have finished\");\n",
-       "  }\n",
-       "\n",
-       "  function load_libs(css_urls, js_urls, callback) {\n",
-       "    if (css_urls == null) css_urls = [];\n",
-       "    if (js_urls == null) js_urls = [];\n",
-       "\n",
-       "    root._bokeh_onload_callbacks.push(callback);\n",
-       "    if (root._bokeh_is_loading > 0) {\n",
-       "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
-       "      return null;\n",
-       "    }\n",
-       "    if (js_urls == null || js_urls.length === 0) {\n",
-       "      run_callbacks();\n",
-       "      return null;\n",
-       "    }\n",
-       "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
-       "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
-       "\n",
-       "    function on_load() {\n",
-       "      root._bokeh_is_loading--;\n",
-       "      if (root._bokeh_is_loading === 0) {\n",
-       "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
-       "        run_callbacks()\n",
-       "      }\n",
-       "    }\n",
-       "\n",
-       "    function on_error() {\n",
-       "      console.error(\"failed to load \" + url);\n",
-       "    }\n",
-       "\n",
-       "    for (var i = 0; i < css_urls.length; i++) {\n",
-       "      var url = css_urls[i];\n",
-       "      const element = document.createElement(\"link\");\n",
-       "      element.onload = on_load;\n",
-       "      element.onerror = on_error;\n",
-       "      element.rel = \"stylesheet\";\n",
-       "      element.type = \"text/css\";\n",
-       "      element.href = url;\n",
-       "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
-       "      document.body.appendChild(element);\n",
-       "    }\n",
-       "\n",
-       "    for (var i = 0; i < js_urls.length; i++) {\n",
-       "      var url = js_urls[i];\n",
-       "      var element = document.createElement('script');\n",
-       "      element.onload = on_load;\n",
-       "      element.onerror = on_error;\n",
-       "      element.async = false;\n",
-       "      element.src = url;\n",
-       "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
-       "      document.head.appendChild(element);\n",
-       "    }\n",
-       "  };var element = document.getElementById(\"1001\");\n",
-       "  if (element == null) {\n",
-       "    console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n",
-       "    return false;\n",
-       "  }\n",
-       "\n",
-       "  function inject_raw_css(css) {\n",
-       "    const element = document.createElement(\"style\");\n",
-       "    element.appendChild(document.createTextNode(css));\n",
-       "    document.body.appendChild(element);\n",
-       "  }\n",
-       "\n",
-       "  \n",
-       "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n",
-       "  var css_urls = [];\n",
-       "  \n",
-       "\n",
-       "  var inline_js = [\n",
-       "    function(Bokeh) {\n",
-       "      Bokeh.set_log_level(\"info\");\n",
-       "    },\n",
-       "    function(Bokeh) {\n",
-       "    \n",
-       "    \n",
-       "    }\n",
-       "  ];\n",
-       "\n",
-       "  function run_inline_js() {\n",
-       "    \n",
-       "    if (root.Bokeh !== undefined || force === true) {\n",
-       "      \n",
-       "    for (var i = 0; i < inline_js.length; i++) {\n",
-       "      inline_js[i].call(root, root.Bokeh);\n",
-       "    }\n",
-       "    if (force === true) {\n",
-       "        display_loaded();\n",
-       "      }} else if (Date.now() < root._bokeh_timeout) {\n",
-       "      setTimeout(run_inline_js, 100);\n",
-       "    } else if (!root._bokeh_failed_load) {\n",
-       "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
-       "      root._bokeh_failed_load = true;\n",
-       "    } else if (force !== true) {\n",
-       "      var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n",
-       "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
-       "    }\n",
-       "\n",
-       "  }\n",
-       "\n",
-       "  if (root._bokeh_is_loading === 0) {\n",
-       "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
-       "    run_inline_js();\n",
-       "  } else {\n",
-       "    load_libs(css_urls, js_urls, function() {\n",
-       "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
-       "      run_inline_js();\n",
-       "    });\n",
-       "  }\n",
-       "}(window));"
-      ],
-      "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(\"1001\");\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };var element = document.getElementById(\"1001\");\n  if (element == null) {\n    console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n    return false;\n  }\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  \n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js\"];\n  var css_urls = [];\n  \n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    function(Bokeh) {\n    \n    \n    }\n  ];\n\n  function run_inline_js() {\n    \n    if (root.Bokeh !== undefined || force === true) {\n      \n    for (var i = 0; i < inline_js.length; i++) {\n      inline_js[i].call(root, root.Bokeh);\n    }\n    if (force === true) {\n        display_loaded();\n      }} else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-=======
    "execution_count": null,
    "metadata": {},
    "outputs": [],
->>>>>>> dev
    "source": [
     "output_notebook()"
    ]
@@ -397,20 +66,11 @@
   },
   {
    "cell_type": "code",
-<<<<<<< HEAD
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#MODEL_RUN_PATH = \"libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n",
-    "MODEL_RUN_PATH = \"libritts_360-half-September-28-2019_10+46AM-8565c50/\"\n",
-=======
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "MODEL_RUN_PATH = \"/media/erogol/data_ssd/Models/libri_tts/speaker_encoder/libritts_360-half-October-31-2019_04+54PM-19d2f5f/\"\n",
->>>>>>> dev
     "MODEL_PATH = MODEL_RUN_PATH + \"best_model.pth.tar\"\n",
     "CONFIG_PATH = MODEL_RUN_PATH + \"config.json\"\n",
     "\n",
@@ -420,81 +80,23 @@
     "\n",
     "# My multi speaker locations\n",
     "EMBED_PATH = \"/home/erogol/Data/Libri-TTS/train-clean-360-embed_128/\"\n",
-<<<<<<< HEAD
-    "AUDIO_PATH = \"datasets/LibriTTS/test-clean/\""
-=======
     "AUDIO_PATH = \"/home/erogol/Data/Libri-TTS/train-clean-360/\""
->>>>>>> dev
    ]
   },
   {
    "cell_type": "code",
-<<<<<<< HEAD
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "best_model.pth.tar\r\n",
-      "config.json\r\n",
-      "events.out.tfevents.1569660396.erogol-desktop\r\n"
-     ]
-    }
-   ],
-=======
    "execution_count": null,
    "metadata": {},
    "outputs": [],
->>>>>>> dev
    "source": [
     "!ls -1 $MODEL_RUN_PATH"
    ]
   },
   {
    "cell_type": "code",
-<<<<<<< HEAD
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      " > Setting up Audio Processor...\n",
-      " | > sample_rate:16000\n",
-      " | > num_mels:40\n",
-      " | > min_level_db:-100\n",
-      " | > frame_shift_ms:12.5\n",
-      " | > frame_length_ms:50\n",
-      " | > ref_level_db:20\n",
-      " | > fft_size:1024\n",
-      " | > power:None\n",
-      " | > preemphasis:0.98\n",
-      " | > griffin_lim_iters:None\n",
-      " | > signal_norm:True\n",
-      " | > symmetric_norm:True\n",
-      " | > mel_fmin:0\n",
-      " | > mel_fmax:8000.0\n",
-      " | > spec_gain:20.0\n",
-      " | > stft_pad_mode:reflect\n",
-      " | > max_norm:4.0\n",
-      " | > clip_norm:True\n",
-      " | > do_trim_silence:False\n",
-      " | > trim_db:60\n",
-      " | > do_sound_norm:False\n",
-      " | > stats_path:None\n",
-      " | > hop_length:200\n",
-      " | > win_length:800\n"
-     ]
-    }
-   ],
-=======
    "execution_count": null,
    "metadata": {},
    "outputs": [],
->>>>>>> dev
    "source": [
     "CONFIG = load_config(CONFIG_PATH)\n",
     "ap = AudioProcessor(**CONFIG['audio'])"
@@ -509,23 +111,9 @@
   },
   {
    "cell_type": "code",
-<<<<<<< HEAD
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Embeddings found: 0\n"
-     ]
-    }
-   ],
-=======
    "execution_count": null,
    "metadata": {},
    "outputs": [],
->>>>>>> dev
    "source": [
     "embed_files = glob.glob(EMBED_PATH+\"/**/*.npy\", recursive=True)\n",
     "print(f'Embeddings found: {len(embed_files)}')"
@@ -540,27 +128,9 @@
   },
   {
    "cell_type": "code",
-<<<<<<< HEAD
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "IndexError",
-     "evalue": "list index out of range",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-8-f67d64b1abbb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0membed_files\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;31mIndexError\u001b[0m: list index out of range"
-     ]
-    }
-   ],
-=======
    "execution_count": null,
    "metadata": {},
    "outputs": [],
->>>>>>> dev
    "source": [
     "embed_files[0]"
    ]
@@ -576,23 +146,9 @@
   },
   {
    "cell_type": "code",
-<<<<<<< HEAD
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Speaker count: 0\n"
-     ]
-    }
-   ],
-=======
    "execution_count": null,
    "metadata": {},
    "outputs": [],
->>>>>>> dev
    "source": [
     "speaker_paths = list(set([os.path.dirname(os.path.dirname(embed_file)) for embed_file in embed_files]))\n",
     "speaker_to_utter = {}\n",
@@ -616,32 +172,11 @@
   },
   {
    "cell_type": "code",
-<<<<<<< HEAD
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "ValueError",
-     "evalue": "'a' cannot be empty unless no samples are taken",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-11-aabd2a5031f8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mspeaker_idxs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_paths\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_speakers\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m \u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mspeaker_num\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mspeaker_idx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspeaker_idxs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32mmtrand.pyx\u001b[0m in \u001b[0;36mnumpy.random.mtrand.RandomState.choice\u001b[0;34m()\u001b[0m\n",
-      "\u001b[0;31mValueError\u001b[0m: 'a' cannot be empty unless no samples are taken"
-     ]
-    }
-   ],
-   "source": [
-    "ttsembeds = []\n",
-=======
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "embeds = []\n",
->>>>>>> dev
     "labels = []\n",
     "locations = []\n",
     "\n",
@@ -665,11 +200,7 @@
     "            embed = np.load(embed_path)\n",
     "            embeds.append(embed)\n",
     "            labels.append(str(speaker_num))\n",
-<<<<<<< HEAD
-    "            #locations.append(embed_path.replace(EMBED_PATH, '').replace('.npy','.wav'))\n",
-=======
     "            locations.append(embed_path.replace(EMBED_PATH, '').replace('.npy','.wav'))\n",
->>>>>>> dev
     "embeds = np.concatenate(embeds)"
    ]
   },
@@ -682,27 +213,9 @@
   },
   {
    "cell_type": "code",
-<<<<<<< HEAD
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "AttributeError",
-     "evalue": "module 'umap' has no attribute 'UMAP'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-12-32709017067f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mumap\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mUMAP\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mprojection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membeds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mAttributeError\u001b[0m: module 'umap' has no attribute 'UMAP'"
-     ]
-    }
-   ],
-=======
    "execution_count": null,
    "metadata": {},
    "outputs": [],
->>>>>>> dev
    "source": [
     "model = umap.UMAP()\n",
     "projection = model.fit_transform(embeds)"
@@ -806,11 +319,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-<<<<<<< HEAD
-   "version": "3.8.5"
-=======
    "version": "3.7.4"
->>>>>>> dev
   }
  },
  "nbformat": 4,
diff --git a/recipes/ljspeech/tacotron2-DCA/tacotron2-DCA.json b/recipes/ljspeech/tacotron2-DCA/tacotron2-DCA.json
index c5b6fa52..73bb8ae3 100644
--- a/recipes/ljspeech/tacotron2-DCA/tacotron2-DCA.json
+++ b/recipes/ljspeech/tacotron2-DCA/tacotron2-DCA.json
@@ -50,7 +50,7 @@
     "stopnet_pos_weight": 15.0,
     "run_eval": true,
     "test_delay_epochs": 10,
-    "max_decoder_steps": 50,
+    "max_decoder_steps": 1000,
     "noam_schedule": true,
     "grad_clip": 0.05,
     "epochs": 1000,
diff --git a/recipes/ljspeech/tacotron2-DDC/tacotron2-DDC.json b/recipes/ljspeech/tacotron2-DDC/tacotron2-DDC.json
index d787c138..339e65b8 100644
--- a/recipes/ljspeech/tacotron2-DDC/tacotron2-DDC.json
+++ b/recipes/ljspeech/tacotron2-DDC/tacotron2-DDC.json
@@ -56,7 +56,7 @@
     "run_eval": true,
     "test_delay_epochs": 10,
     "test_sentences_file": null,
-    "max_decoder_steps": 50,
+    "max_decoder_steps": 1000,
     "noam_schedule": true,
     "grad_clip": 0.05,
     "epochs": 1000,
diff --git a/tests/data_tests/test_loader.py b/tests/data_tests/test_loader.py
index 9bc70ddd..3fd3eaef 100644
--- a/tests/data_tests/test_loader.py
+++ b/tests/data_tests/test_loader.py
@@ -207,7 +207,7 @@ class TestTTSDataset(unittest.TestCase):
                 assert linear_input[1 - idx, -1].sum() == 0
                 assert mel_input[1 - idx, -1].sum() == 0
                 assert stop_target[1, mel_lengths[1] - 1] == 1
-                assert stop_target[1, mel_lengths[1] :].sum() == 0
+                assert stop_target[1, mel_lengths[1] :].sum() == stop_target.shape[1] - mel_lengths[1]
                 assert len(mel_lengths.shape) == 1
 
                 # check batch zero-frame conditions (zero-frame disabled)
diff --git a/tests/test_speaker_encoder.py b/tests/test_speaker_encoder.py
index 0bb07f37..3c897aa9 100644
--- a/tests/test_speaker_encoder.py
+++ b/tests/test_speaker_encoder.py
@@ -35,7 +35,7 @@ class LSTMSpeakerEncoderTests(unittest.TestCase):
         assert abs(assert_diff) < 1e-4, f" [!] output_norm has wrong values - {assert_diff}"
         # compute d for a given batch
         dummy_input = T.rand(1, 240, 80)  # B x T x D
-        output = model.compute_embedding(dummy_input, num_frames=160, overlap=0.5)
+        output = model.compute_embedding(dummy_input, num_frames=160, num_eval=5)
         assert output.shape[0] == 1
         assert output.shape[1] == 256
         assert len(output.shape) == 2
diff --git a/tests/test_speaker_encoder_train.py b/tests/test_speaker_encoder_train.py
index 21b12074..7901fe5a 100644
--- a/tests/test_speaker_encoder_train.py
+++ b/tests/test_speaker_encoder_train.py
@@ -6,7 +6,20 @@ from tests import get_device_id, get_tests_output_path, run_cli
 from TTS.config.shared_configs import BaseAudioConfig
 from TTS.speaker_encoder.speaker_encoder_config import SpeakerEncoderConfig
 
-config_path = os.path.join(get_tests_output_path(), "test_model_config.json")
+
+def run_test_train():
+    command = (
+        f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --config_path {config_path} "
+        f"--coqpit.output_path {output_path} "
+        "--coqpit.datasets.0.name ljspeech "
+        "--coqpit.datasets.0.meta_file_train metadata.csv "
+        "--coqpit.datasets.0.meta_file_val metadata.csv "
+        "--coqpit.datasets.0.path tests/data/ljspeech "
+    )
+    run_cli(command)
+
+
+config_path = os.path.join(get_tests_output_path(), "test_speaker_encoder_config.json")
 output_path = os.path.join(get_tests_output_path(), "train_outputs")
 
 config = SpeakerEncoderConfig(
@@ -24,16 +37,9 @@ config.audio.do_trim_silence = True
 config.audio.trim_db = 60
 config.save_json(config_path)
 
+print(config)
 # train the model for one epoch
-command_train = (
-    f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --config_path {config_path} "
-    f"--coqpit.output_path {output_path} "
-    "--coqpit.datasets.0.name ljspeech "
-    "--coqpit.datasets.0.meta_file_train metadata.csv "
-    "--coqpit.datasets.0.meta_file_val metadata.csv "
-    "--coqpit.datasets.0.path tests/data/ljspeech "
-)
-run_cli(command_train)
+run_test_train()
 
 # Find latest folder
 continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
@@ -50,15 +56,7 @@ config.model_params["model_name"] = "resnet"
 config.save_json(config_path)
 
 # train the model for one epoch
-command_train = (
-    f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_encoder.py --config_path {config_path} "
-    f"--coqpit.output_path {output_path} "
-    "--coqpit.datasets.0.name ljspeech "
-    "--coqpit.datasets.0.meta_file_train metadata.csv "
-    "--coqpit.datasets.0.meta_file_val metadata.csv "
-    "--coqpit.datasets.0.path tests/data/ljspeech "
-)
-run_cli(command_train)
+run_test_train()
 
 # Find latest folder
 continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime)
@@ -69,3 +67,18 @@ command_train = (
 )
 run_cli(command_train)
 shutil.rmtree(continue_path)
+
+# test model with ge2e loss function
+config.loss = "ge2e"
+config.save_json(config_path)
+run_test_train()
+
+# test model with angleproto loss function
+config.loss = "angleproto"
+config.save_json(config_path)
+run_test_train()
+
+# test model with softmaxproto loss function
+config.loss = "softmaxproto"
+config.save_json(config_path)
+run_test_train()