mirror of https://github.com/coqui-ai/TTS.git
Bug fixes
This commit is contained in:
parent
0ef3c0ac3f
commit
adbe603af1
2
.compute
2
.compute
|
@ -1,3 +1,3 @@
|
|||
#!/bin/bash
|
||||
source ../tmp/venv/bin/activate
|
||||
python train.py --config_path config.json
|
||||
python train.py --config_path config.json --debug true
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
{
|
||||
"model_name": "best-model",
|
||||
"model_name": "audio-update-l2-loss",
|
||||
"num_mels": 80,
|
||||
"num_freq": 1025,
|
||||
"sample_rate": 20000,
|
||||
"frame_length_ms": 50,
|
||||
"frame_shift_ms": 12.5,
|
||||
"preemphasis": 0.97,
|
||||
"min_mel_freq": 125,
|
||||
"max_mel_freq": 7600,
|
||||
"min_level_db": -100,
|
||||
"ref_level_db": 20,
|
||||
"embedding_size": 256,
|
||||
|
@ -25,7 +27,7 @@
|
|||
|
||||
"checkpoint": true,
|
||||
"save_step": 376,
|
||||
"print_step": 50,
|
||||
"print_step": 10,
|
||||
"data_path": "/snakepit/shared/data/keithito/LJSpeech-1.1/",
|
||||
"min_seq_len": 0,
|
||||
"output_path": "experiments/"
|
||||
|
|
|
@ -38,11 +38,11 @@ class LocationSensitiveAttention(nn.Module):
|
|||
super(LocationSensitiveAttention, self).__init__()
|
||||
self.kernel_size = kernel_size
|
||||
self.filters = filters
|
||||
padding = int((kernel - 1) / 2)
|
||||
self.loc_conv = nn.Conv1d(2, filters,
|
||||
padding = int((kernel_size - 1) / 2)
|
||||
self.loc_conv = nn.Conv1d(1, filters,
|
||||
kernel_size=kernel_size, stride=1,
|
||||
padding=padding, bias=False)
|
||||
self.loc_linear = nn.Linear(loc_dim, hidden_dim)
|
||||
self.loc_linear = nn.Linear(filters, hidden_dim)
|
||||
self.query_layer = nn.Linear(query_dim, hidden_dim, bias=True)
|
||||
self.annot_layer = nn.Linear(annot_dim, hidden_dim, bias=True)
|
||||
self.v = nn.Linear(hidden_dim, 1, bias=False)
|
||||
|
@ -79,7 +79,7 @@ class AttentionRNNCell(nn.Module):
|
|||
memory_dim (int): memory vector (decoder autogression) feature dimension.
|
||||
align_model (str): 'b' for Bahdanau, 'ls' Location Sensitive alignment.
|
||||
"""
|
||||
super(AttentionRNN, self).__init__()
|
||||
super(AttentionRNNCell, self).__init__()
|
||||
self.align_model = align_model
|
||||
self.rnn_cell = nn.GRUCell(out_dim + memory_dim, out_dim)
|
||||
# pick bahdanau or location sensitive attention
|
||||
|
|
|
@ -275,7 +275,7 @@ class Decoder(nn.Module):
|
|||
# dim=1)
|
||||
attention_rnn_hidden, current_context_vec, attention = self.attention_rnn(
|
||||
processed_memory, current_context_vec, attention_rnn_hidden,
|
||||
inputs, attention, input_lens)
|
||||
inputs, attention.unsqueeze(1), input_lens)
|
||||
# attention_cum += attention
|
||||
# Concat RNN output and attention context vector
|
||||
decoder_input = self.project_to_decoder_in(
|
||||
|
|
3
train.py
3
train.py
|
@ -25,7 +25,7 @@ from utils.generic_utils import (Progbar, remove_experiment_folder,
|
|||
from utils.visual import plot_alignment, plot_spectrogram
|
||||
from datasets.LJSpeech import LJSpeechDataset
|
||||
from models.tacotron import Tacotron
|
||||
from layers.losses import L2LossMasked
|
||||
from layers.losses import L1LossMasked
|
||||
|
||||
torch.manual_seed(1)
|
||||
use_cuda = torch.cuda.is_available()
|
||||
|
@ -338,6 +338,7 @@ def evaluate(model, criterion, criterion_st, data_loader, current_step):
|
|||
|
||||
def main(args):
|
||||
|
||||
# Setup the dataset
|
||||
# Setup the dataset
|
||||
train_dataset = LJSpeechDataset(os.path.join(c.data_path, 'metadata_train.csv'),
|
||||
os.path.join(c.data_path, 'wavs'),
|
||||
|
|
|
@ -37,7 +37,7 @@ def get_commit_hash():
|
|||
|
||||
def create_experiment_folder(root_path, model_name, debug):
|
||||
""" Create a folder with the current date and time """
|
||||
date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I:%M%p")
|
||||
date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I+%M%p")
|
||||
if debug:
|
||||
commit_hash = 'debug'
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue