diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py
index 56a2b954..dae3ebac 100644
--- a/TTS/bin/train_encoder.py
+++ b/TTS/bin/train_encoder.py
@@ -46,6 +46,7 @@ def setup_loader(ap, is_val=False, verbose=False):
                             skip_speakers=False,
                             storage_size=c.storage["storage_size"],
                             sample_from_storage_p=c.storage["sample_from_storage_p"],
+                            additive_noise=c.storage["additive_noise"],
                             verbose=verbose)
         # sampler = DistributedSampler(dataset) if num_gpus > 1 else None
         loader = DataLoader(dataset,
diff --git a/TTS/speaker_encoder/config.json b/TTS/speaker_encoder/config.json
index f350779d..d7c959cf 100644
--- a/TTS/speaker_encoder/config.json
+++ b/TTS/speaker_encoder/config.json
@@ -27,7 +27,7 @@
         "trim_db": 60          // threshold for timming silence. Set this according to your dataset.
     },
     "reinit_layers": [],
-    "loss": "ge2e", // "ge2e" to use Generalized End-to-End loss and "angleproto" to use Angular Prototypical loss (new SOTA)
+    "loss": "angleproto", // "ge2e" to use Generalized End-to-End loss and "angleproto" to use Angular Prototypical loss (new SOTA)
     "grad_clip": 3.0, // upper limit for gradients for clipping.
     "epochs": 1000, // total number of epochs to train.
     "lr": 0.0001, // Initial learning rate. If Noam decay is active, maximum learning rate.
@@ -35,12 +35,12 @@
     "warmup_steps": 4000, // Noam decay steps to increase the learning rate from 0 to "lr"
     "tb_model_param_stats": false, // true, plots param stats per layer on tensorboard. Might be memory consuming, but good for debugging. 
     "steps_plot_stats": 10, // number of steps to plot embeddings.
-    "num_speakers_in_batch": 32, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
+    "num_speakers_in_batch": 64, // Batch size for training. Lower values than 32 might cause hard to learn attention. It is overwritten by 'gradual_training'.
     "num_loader_workers": 4,        // number of training data loader processes. Don't set it too big. 4-8 are good values.
     "wd": 0.000001, // Weight decay weight.
     "checkpoint": true, // If true, it saves checkpoints per "save_step"
     "save_step": 1000, // Number of training steps expected to save traning stats and checkpoints.
-    "print_step": 1, // Number of steps to log traning on console.
+    "print_step": 20, // Number of steps to log traning on console.
     "output_path": "../../MozillaTTSOutput/checkpoints/voxceleb_librispeech/speaker_encoder/", // DATASET-RELATED: output path for all training outputs.
     "model": {
         "input_dim": 40,
@@ -51,7 +51,8 @@
     },
     "storage": {
         "sample_from_storage_p": 0.42,  // the probability with which we'll sample from the DataSet in-memory storage
-        "storage_size": 5   // the size of the in-memory storage with respect to a single batch
+        "storage_size": 5,   // the size of the in-memory storage with respect to a single batch
+        "additive_noise": 1e-5   // add very small gaussian noise to the data in order to increase robustness
     },
     "datasets": 
         [
@@ -60,42 +61,42 @@
                 "path": "../../audio-datasets/en/VCTK-Corpus/",
                 "meta_file_train": null,
                 "meta_file_val": null
+            },
+            {
+                "name": "libri_tts",
+                "path": "../../audio-datasets/en/LibriTTS/train-clean-100",
+                "meta_file_train": null,
+                "meta_file_val": null
+            },
+            {
+                "name": "libri_tts",
+                "path": "../../audio-datasets/en/LibriTTS/train-clean-360",
+                "meta_file_train": null,
+                "meta_file_val": null
+            },
+            {
+                "name": "libri_tts",
+                "path": "../../audio-datasets/en/LibriTTS/train-other-500",
+                "meta_file_train": null,
+                "meta_file_val": null
+            },
+            {
+                "name": "voxceleb1",
+                "path": "../../audio-datasets/en/voxceleb1/",
+                "meta_file_train": null,
+                "meta_file_val": null
+            },
+            {
+                "name": "voxceleb2",
+                "path": "../../audio-datasets/en/voxceleb2/",
+                "meta_file_train": null,
+                "meta_file_val": null
+            },
+            {
+                "name": "common_voice_wav",
+                "path": "../../audio-datasets/en/MozillaCommonVoice",
+                "meta_file_train": "train.tsv",
+                "meta_file_val": "test.tsv"
             }
-//            {
-//                "name": "libri_tts",
-//                "path": "../../audio-datasets/en/LibriTTS/train-clean-100",
-//                "meta_file_train": null,
-//                "meta_file_val": null
-//            },
-//            {
-//                "name": "libri_tts",
-//                "path": "../../audio-datasets/en/LibriTTS/train-clean-360",
-//                "meta_file_train": null,
-//                "meta_file_val": null
-//            },
-//            {
-//                "name": "libri_tts",
-//                "path": "../../audio-datasets/en/LibriTTS/train-other-500",
-//                "meta_file_train": null,
-//                "meta_file_val": null
-//            },
-//            {
-//                "name": "voxceleb1",
-//                "path": "../../audio-datasets/en/voxceleb1/",
-//                "meta_file_train": null,
-//                "meta_file_val": null
-//            },
-//            {
-//                "name": "voxceleb2",
-//                "path": "../../audio-datasets/en/voxceleb2/",
-//                "meta_file_train": null,
-//                "meta_file_val": null
-//            },
-//            {
-//                "name": "common_voice_wav",
-//                "path": "../../audio-datasets/en/MozillaCommonVoice",
-//                "meta_file_train": "train.tsv",
-//                "meta_file_val": "test.tsv"
-//            }
         ]
 }
\ No newline at end of file
diff --git a/TTS/speaker_encoder/dataset.py b/TTS/speaker_encoder/dataset.py
index 3f3db88d..05709080 100644
--- a/TTS/speaker_encoder/dataset.py
+++ b/TTS/speaker_encoder/dataset.py
@@ -1,3 +1,4 @@
+import numpy
 import numpy as np
 import queue
 import torch
@@ -8,7 +9,7 @@ from tqdm import tqdm
 
 class MyDataset(Dataset):
     def __init__(self, ap, meta_data, voice_len=1.6, num_speakers_in_batch=64,
-                 storage_size=1, sample_from_storage_p=0.5,
+                 storage_size=1, sample_from_storage_p=0.5, additive_noise=0,
                  num_utter_per_speaker=10, skip_speakers=False, verbose=False):
         """
         Args:
@@ -29,6 +30,7 @@ class MyDataset(Dataset):
         self.__parse_items()
         self.storage = queue.Queue(maxsize=storage_size*num_speakers_in_batch)
         self.sample_from_storage_p = float(sample_from_storage_p)
+        self.additive_noise = float(additive_noise)
         if self.verbose:
             print("\n > DataLoader initialization")
             print(f" | > Storage Size: {self.storage.maxsize} speakers, each with {num_utter_per_speaker} utters")
@@ -150,6 +152,11 @@ class MyDataset(Dataset):
                 # put the newly loaded item into storage
                 self.storage.put_nowait((wavs_, labels_))
 
+            # add random gaussian noise
+            if self.additive_noise > 0:
+                noises_ = [numpy.random.normal(0, self.additive_noise, size=len(w)) for w in wavs_]
+                wavs_ = [wavs_[i] + noises_[i] for i in range(len(wavs_))]
+
             # get a random subset of each of the wavs and convert to MFCC.
             offsets_ = [random.randint(0, wav.shape[0] - self.seq_len) for wav in wavs_]
             mels_ = [self.ap.melspectrogram(wavs_[i][offsets_[i]: offsets_[i] + self.seq_len]) for i in range(len(wavs_))]