mirror of https://github.com/coqui-ai/TTS.git
Remove pandas rom dataset
This commit is contained in:
parent
cef70923d1
commit
7b35454478
|
@ -12,7 +12,7 @@
|
||||||
"text_cleaner": "english_cleaners",
|
"text_cleaner": "english_cleaners",
|
||||||
|
|
||||||
"epochs": 2000,
|
"epochs": 2000,
|
||||||
"lr": 0.005,
|
"lr": 0.0006,
|
||||||
"warmup_steps": 4000,
|
"warmup_steps": 4000,
|
||||||
"batch_size": 180,
|
"batch_size": 180,
|
||||||
"r": 5,
|
"r": 5,
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import pandas as pd
|
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import collections
|
import collections
|
||||||
|
@ -16,7 +15,10 @@ class LJSpeechDataset(Dataset):
|
||||||
def __init__(self, csv_file, root_dir, outputs_per_step, sample_rate,
|
def __init__(self, csv_file, root_dir, outputs_per_step, sample_rate,
|
||||||
text_cleaner, num_mels, min_level_db, frame_shift_ms,
|
text_cleaner, num_mels, min_level_db, frame_shift_ms,
|
||||||
frame_length_ms, preemphasis, ref_level_db, num_freq, power):
|
frame_length_ms, preemphasis, ref_level_db, num_freq, power):
|
||||||
self.frames = pd.read_csv(csv_file, sep='|', header=None)
|
|
||||||
|
f = open(csv_file, "r")
|
||||||
|
self.frames = [line.split('|') for line in f]
|
||||||
|
f.close()
|
||||||
self.root_dir = root_dir
|
self.root_dir = root_dir
|
||||||
self.outputs_per_step = outputs_per_step
|
self.outputs_per_step = outputs_per_step
|
||||||
self.sample_rate = sample_rate
|
self.sample_rate = sample_rate
|
||||||
|
@ -40,7 +42,7 @@ class LJSpeechDataset(Dataset):
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
wav_name = os.path.join(self.root_dir,
|
wav_name = os.path.join(self.root_dir,
|
||||||
self.frames.ix[idx, 0]) + '.wav'
|
self.frames.ix[idx, 0]) + '.wav'
|
||||||
text = self.frames.ix[idx, 1]
|
text = self.frames[idx][1]
|
||||||
text = np.asarray(text_to_sequence(text, [self.cleaners]), dtype=np.int32)
|
text = np.asarray(text_to_sequence(text, [self.cleaners]), dtype=np.int32)
|
||||||
wav = np.asarray(self.load_wav(wav_name)[0], dtype=np.float32)
|
wav = np.asarray(self.load_wav(wav_name)[0], dtype=np.float32)
|
||||||
sample = {'text': text, 'wav': wav, 'item_idx': self.frames.ix[idx, 0]}
|
sample = {'text': text, 'wav': wav, 'item_idx': self.frames.ix[idx, 0]}
|
||||||
|
|
4
train.py
4
train.py
|
@ -73,7 +73,8 @@ def main(args):
|
||||||
|
|
||||||
dataloader = DataLoader(dataset, batch_size=c.batch_size,
|
dataloader = DataLoader(dataset, batch_size=c.batch_size,
|
||||||
shuffle=True, collate_fn=dataset.collate_fn,
|
shuffle=True, collate_fn=dataset.collate_fn,
|
||||||
drop_last=True, num_workers=c.num_loader_workers)
|
drop_last=True, num_workers=c.num_loader_workers,
|
||||||
|
pin_memory=True)
|
||||||
|
|
||||||
# setup the model
|
# setup the model
|
||||||
model = Tacotron(c.embedding_size,
|
model = Tacotron(c.embedding_size,
|
||||||
|
@ -108,6 +109,7 @@ def main(args):
|
||||||
start_epoch = checkpoint['step'] // len(dataloader)
|
start_epoch = checkpoint['step'] // len(dataloader)
|
||||||
best_loss = checkpoint['linear_loss']
|
best_loss = checkpoint['linear_loss']
|
||||||
start_epoch = 0
|
start_epoch = 0
|
||||||
|
args.restore_step = checkpoint['step']
|
||||||
else:
|
else:
|
||||||
print("\n > Starting a new training")
|
print("\n > Starting a new training")
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
def plot_alignment(alignment, info=None):
|
def plot_alignment(alignment, info=None):
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots(figsize=(16,10))
|
||||||
im = ax.imshow(alignment.T, aspect='auto', origin='lower',
|
im = ax.imshow(alignment.T, aspect='auto', origin='lower',
|
||||||
interpolation='none')
|
interpolation='none')
|
||||||
fig.colorbar(im, ax=ax)
|
fig.colorbar(im, ax=ax)
|
||||||
|
|
Loading…
Reference in New Issue