diff --git a/.compute b/.compute index de3589ae..5ad456b8 100644 --- a/.compute +++ b/.compute @@ -4,16 +4,13 @@ yes | apt-get install ffmpeg yes | apt-get install espeak yes | apt-get install tmux yes | apt-get install zsh -# pip3 install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp37-cp37m-linux_x86_64.whl -# wget https://www.dropbox.com/s/m8waow6b3ydpf6h/MozillaDataset.tar.gz?dl=0 -O /data/rw/home/mozilla.tar -wget https://www.dropbox.com/s/wqn5v3wkktw9lmo/install.sh?dl=0 -O install.sh +pip3 install https://download.pytorch.org/whl/cu100/torch-1.3.0%2Bcu100-cp36-cp36m-linux_x86_64.whl sudo sh install.sh +pip install pytorch==1.3.0+cu100 python3 setup.py develop -# cp -R ${USER_DIR}/GermanData ../tmp/ -# cp -R /data/ro/shared/data/keithito/LJSpeech-1.1/ ../tmp/ # python3 distribute.py --config_path config.json --data_path /data/ro/shared/data/keithito/LJSpeech-1.1/ # cp -R ${USER_DIR}/Mozilla_22050 ../tmp/ # python3 distribute.py --config_path config_tacotron_gst.json --data_path ../tmp/Mozilla_22050/ # python3 distribute.py --config_path config.json --data_path /data/rw/home/LibriTTS/train-clean-360 -python3 distribute.py --config_path config.json +# python3 distribute.py --config_path config.json while true; do sleep 1000000; done diff --git a/config.json b/config.json index 9cc4b222..1226e1ac 100644 --- a/config.json +++ b/config.json @@ -1,6 +1,6 @@ { "run_name": "ljspeech", - "run_description": "Tacotron2 ljspeech release training", + "run_description": "Tacotron ljspeech release training", "audio":{ // Audio processing parameters @@ -31,7 +31,7 @@ "reinit_layers": [], - "model": "Tacotron2", // one of the model in models/ + "model": "Tacotron", // one of the model in models/ "grad_clip": 1, // upper limit for gradients for clipping. "epochs": 1000, // total number of epochs to train. "lr": 0.0001, // Initial learning rate. If Noam decay is active, maximum learning rate. diff --git a/layers/losses.py b/layers/losses.py index a6bf95d3..6ccb3986 100644 --- a/layers/losses.py +++ b/layers/losses.py @@ -1,3 +1,5 @@ +import numpy as np +import torch from torch import nn from torch.nn import functional from TTS.utils.generic_utils import sequence_mask @@ -53,3 +55,17 @@ class MSELossMasked(nn.Module): x * mask, target * mask, reduction="sum") loss = loss / mask.sum() return loss + + +class AttentionEntropyLoss(nn.Module): + def forward(self, align): + """ + Forces attention to be more decisive by penalizing + soft attention weights + + TODO: arguments + TODO: unit_test + """ + entropy = torch.distributions.Categorical(probs=align).entropy() + loss = (entropy / np.log(align.shape[1])).mean() + return loss