Bug fixes

This commit is contained in:
Eren G 2018-07-13 15:24:50 +02:00
parent a211766a10
commit b54da3b460
6 changed files with 13 additions and 10 deletions

View File

@ -1,3 +1,3 @@
#!/bin/bash #!/bin/bash
source ../tmp/venv/bin/activate source ../tmp/venv/bin/activate
python train.py --config_path config.json python train.py --config_path config.json --debug true

View File

@ -1,11 +1,13 @@
{ {
"model_name": "best-model", "model_name": "audio-update-l2-loss",
"num_mels": 80, "num_mels": 80,
"num_freq": 1025, "num_freq": 1025,
"sample_rate": 20000, "sample_rate": 20000,
"frame_length_ms": 50, "frame_length_ms": 50,
"frame_shift_ms": 12.5, "frame_shift_ms": 12.5,
"preemphasis": 0.97, "preemphasis": 0.97,
"min_mel_freq": 125,
"max_mel_freq": 7600,
"min_level_db": -100, "min_level_db": -100,
"ref_level_db": 20, "ref_level_db": 20,
"embedding_size": 256, "embedding_size": 256,
@ -25,7 +27,7 @@
"checkpoint": true, "checkpoint": true,
"save_step": 376, "save_step": 376,
"print_step": 50, "print_step": 10,
"data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/", "data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/",
"min_seq_len": 0, "min_seq_len": 0,
"output_path": "experiments/" "output_path": "experiments/"

View File

@ -38,11 +38,11 @@ class LocationSensitiveAttention(nn.Module):
super(LocationSensitiveAttention, self).__init__() super(LocationSensitiveAttention, self).__init__()
self.kernel_size = kernel_size self.kernel_size = kernel_size
self.filters = filters self.filters = filters
padding = int((kernel - 1) / 2) padding = int((kernel_size - 1) / 2)
self.loc_conv = nn.Conv1d(2, filters, self.loc_conv = nn.Conv1d(1, filters,
kernel_size=kernel_size, stride=1, kernel_size=kernel_size, stride=1,
padding=padding, bias=False) padding=padding, bias=False)
self.loc_linear = nn.Linear(loc_dim, hidden_dim) self.loc_linear = nn.Linear(filters, hidden_dim)
self.query_layer = nn.Linear(query_dim, hidden_dim, bias=True) self.query_layer = nn.Linear(query_dim, hidden_dim, bias=True)
self.annot_layer = nn.Linear(annot_dim, hidden_dim, bias=True) self.annot_layer = nn.Linear(annot_dim, hidden_dim, bias=True)
self.v = nn.Linear(hidden_dim, 1, bias=False) self.v = nn.Linear(hidden_dim, 1, bias=False)
@ -79,7 +79,7 @@ class AttentionRNNCell(nn.Module):
memory_dim (int): memory vector (decoder autogression) feature dimension. memory_dim (int): memory vector (decoder autogression) feature dimension.
align_model (str): 'b' for Bahdanau, 'ls' Location Sensitive alignment. align_model (str): 'b' for Bahdanau, 'ls' Location Sensitive alignment.
""" """
super(AttentionRNN, self).__init__() super(AttentionRNNCell, self).__init__()
self.align_model = align_model self.align_model = align_model
self.rnn_cell = nn.GRUCell(out_dim + memory_dim, out_dim) self.rnn_cell = nn.GRUCell(out_dim + memory_dim, out_dim)
# pick bahdanau or location sensitive attention # pick bahdanau or location sensitive attention

View File

@ -275,7 +275,7 @@ class Decoder(nn.Module):
# dim=1) # dim=1)
attention_rnn_hidden, current_context_vec, attention = self.attention_rnn( attention_rnn_hidden, current_context_vec, attention = self.attention_rnn(
processed_memory, current_context_vec, attention_rnn_hidden, processed_memory, current_context_vec, attention_rnn_hidden,
inputs, attention, input_lens) inputs, attention.unsqueeze(1), input_lens)
# attention_cum += attention # attention_cum += attention
# Concat RNN output and attention context vector # Concat RNN output and attention context vector
decoder_input = self.project_to_decoder_in( decoder_input = self.project_to_decoder_in(

View File

@ -25,7 +25,7 @@ from utils.generic_utils import (Progbar, remove_experiment_folder,
from utils.visual import plot_alignment, plot_spectrogram from utils.visual import plot_alignment, plot_spectrogram
from datasets.LJSpeech import LJSpeechDataset from datasets.LJSpeech import LJSpeechDataset
from models.tacotron import Tacotron from models.tacotron import Tacotron
from layers.losses import L2LossMasked from layers.losses import L1LossMasked
torch.manual_seed(1) torch.manual_seed(1)
use_cuda = torch.cuda.is_available() use_cuda = torch.cuda.is_available()
@ -338,6 +338,7 @@ def evaluate(model, criterion, criterion_st, data_loader, current_step):
def main(args): def main(args):
# Setup the dataset
# Setup the dataset # Setup the dataset
train_dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata_train.csv'), train_dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata_train.csv'),
os.path.join(c.data_path, 'wavs'), os.path.join(c.data_path, 'wavs'),

View File

@ -37,7 +37,7 @@ def get_commit_hash():
def create_experiment_folder(root_path, model_name, debug): def create_experiment_folder(root_path, model_name, debug):
""" Create a folder with the current date and time """ """ Create a folder with the current date and time """
date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I:%M%p") date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I+%M%p")
if debug: if debug:
commit_hash = 'debug' commit_hash = 'debug'
else: else: