diff --git a/.compute b/.compute
index de3589ae..5ad456b8 100644
--- a/.compute
+++ b/.compute
@@ -4,16 +4,13 @@ yes | apt-get install ffmpeg
 yes | apt-get install espeak 
 yes | apt-get install tmux
 yes | apt-get install zsh
-# pip3 install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux_x86_64.whl
-# wget https://www.dropbox.com/s/m8waow6b3ydpf6h/MozillaDataset.tar.gz?dl=0 -O /data/rw/home/mozilla.tar
-wget https://www.dropbox.com/s/wqn5v3wkktw9lmo/install.sh?dl=0 -O install.sh
+pip3 install https://download.pytorch.org/whl/cu100/torch-1.3.0%2Bcu100-cp36-cp36m-linux_x86_64.whl
 sudo sh install.sh
+pip install pytorch==1.3.0+cu100
 python3 setup.py develop
-# cp -R ${USER_DIR}/GermanData ../tmp/
-# cp -R /data/ro/shared/data/keithito/LJSpeech-1.1/ ../tmp/
 # python3 distribute.py --config_path config.json  --data_path /data/ro/shared/data/keithito/LJSpeech-1.1/
 # cp -R ${USER_DIR}/Mozilla_22050 ../tmp/
 # python3 distribute.py --config_path config_tacotron_gst.json  --data_path ../tmp/Mozilla_22050/
 # python3 distribute.py --config_path config.json  --data_path /data/rw/home/LibriTTS/train-clean-360
-python3 distribute.py --config_path config.json
+# python3 distribute.py --config_path config.json
 while true; do sleep 1000000; done
diff --git a/config.json b/config.json
index 9cc4b222..1226e1ac 100644
--- a/config.json
+++ b/config.json
@@ -1,6 +1,6 @@
 {
     "run_name": "ljspeech",
-    "run_description": "Tacotron2 ljspeech release training",
+    "run_description": "Tacotron ljspeech release training",
 
     "audio":{
         // Audio processing parameters
@@ -31,7 +31,7 @@
 
     "reinit_layers": [],
 
-    "model": "Tacotron2",          // one of the model in models/    
+    "model": "Tacotron",          // one of the model in models/    
     "grad_clip": 1,                // upper limit for gradients for clipping.
     "epochs": 1000,                // total number of epochs to train.
     "lr": 0.0001,                  // Initial learning rate. If Noam decay is active, maximum learning rate.
diff --git a/layers/losses.py b/layers/losses.py
index a6bf95d3..6ccb3986 100644
--- a/layers/losses.py
+++ b/layers/losses.py
@@ -1,3 +1,5 @@
+import numpy as np
+import torch
 from torch import nn
 from torch.nn import functional
 from TTS.utils.generic_utils import sequence_mask
@@ -53,3 +55,17 @@ class MSELossMasked(nn.Module):
             x * mask, target * mask, reduction="sum")
         loss = loss / mask.sum()
         return loss
+
+
+class AttentionEntropyLoss(nn.Module):
+    def forward(self, align):
+        """
+        Forces attention to be more decisive by penalizing
+        soft attention weights 
+
+        TODO: arguments
+        TODO: unit_test
+        """
+        entropy = torch.distributions.Categorical(probs=align).entropy()
+        loss = (entropy / np.log(align.shape[1])).mean()
+        return loss